2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Qu Pengfei <Pengfei.Qu@intel.com>
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
43 #include "intel_media.h"
45 typedef enum _gen6_brc_status {
46 BRC_NO_HRD_VIOLATION = 0,
49 BRC_UNDERFLOW_WITH_MAX_QP = 3,
50 BRC_OVERFLOW_WITH_MIN_QP = 4,
54 #define BRC_CLIP(x, min, max) \
56 x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \
59 #define BRC_P_B_QP_DIFF 4
60 #define BRC_I_P_QP_DIFF 2
61 #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
63 #define BRC_PWEIGHT 0.6 /* weight if P slice with comparison to I slice */
64 #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
66 #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
67 #define BRC_CY 0.1 /* weight for */
68 #define BRC_CX_UNDERFLOW 5.
69 #define BRC_CX_OVERFLOW -4.
71 #define BRC_PI_0_5 1.5707963267948966192313216916398
73 /* intel buffer write */
74 #define ALLOC_ENCODER_BUFFER(gen_buffer, string, size) do { \
75 dri_bo_unreference(gen_buffer->bo); \
76 gen_buffer->bo = dri_bo_alloc(i965->intel.bufmgr, \
80 assert(gen_buffer->bo); \
84 #define OUT_BUFFER_X(buf_bo, is_target, ma) do { \
86 OUT_BCS_RELOC64(batch, \
88 I915_GEM_DOMAIN_INSTRUCTION, \
89 is_target ? I915_GEM_DOMAIN_INSTRUCTION : 0, \
92 OUT_BCS_BATCH(batch, 0); \
93 OUT_BCS_BATCH(batch, 0); \
96 OUT_BCS_BATCH(batch, i965->intel.mocs_state); \
99 #define OUT_BUFFER_MA_TARGET(buf_bo) OUT_BUFFER_X(buf_bo, 1, 1)
100 #define OUT_BUFFER_MA_REFERENCE(buf_bo) OUT_BUFFER_X(buf_bo, 0, 1)
101 #define OUT_BUFFER_NMA_TARGET(buf_bo) OUT_BUFFER_X(buf_bo, 1, 0)
102 #define OUT_BUFFER_NMA_REFERENCE(buf_bo) OUT_BUFFER_X(buf_bo, 0, 0)
105 #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
106 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
107 #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
109 #define HCP_SOFTWARE_SKYLAKE 1
111 #define NUM_HCPE_KERNEL 2
113 #define INTER_MODE_MASK 0x03
114 #define INTER_8X8 0x03
115 #define INTER_16X8 0x01
116 #define INTER_8X16 0x02
117 #define SUBMB_SHAPE_MASK 0x00FF00
119 #define INTER_MV8 (4 << 20)
120 #define INTER_MV32 (6 << 20)
127 hevc_gen_default_iq_matrix_encoder(VAQMatrixBufferHEVC *iq_matrix)
130 memset(&iq_matrix->scaling_lists_4x4, 16, sizeof(iq_matrix->scaling_lists_4x4));
133 memset(&iq_matrix->scaling_lists_8x8, 16, sizeof(iq_matrix->scaling_lists_8x8));
136 memset(&iq_matrix->scaling_lists_16x16, 16, sizeof(iq_matrix->scaling_lists_16x16));
139 memset(&iq_matrix->scaling_lists_32x32, 16, sizeof(iq_matrix->scaling_lists_32x32));
141 /* Flat_16x16_dc_16 */
142 memset(&iq_matrix->scaling_list_dc_16x16, 16, sizeof(iq_matrix->scaling_list_dc_16x16));
144 /* Flat_32x32_dc_16 */
145 memset(&iq_matrix->scaling_list_dc_32x32, 16, sizeof(iq_matrix->scaling_list_dc_32x32));
148 /* HEVC picture and slice state related */
151 gen9_hcpe_pipe_mode_select(VADriverContextP ctx,
153 struct intel_encoder_context *encoder_context)
155 struct i965_driver_data *i965 = i965_driver_data(ctx);
156 struct intel_batchbuffer *batch = encoder_context->base.batch;
158 assert(standard_select == HCP_CODEC_HEVC);
160 if(IS_KBL(i965->intel.device_info))
162 BEGIN_BCS_BATCH(batch, 6);
164 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
168 BEGIN_BCS_BATCH(batch, 4);
170 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (4 - 2));
174 (standard_select << 5) |
175 (0 << 3) | /* disable Pic Status / Error Report */
176 HCP_CODEC_SELECT_ENCODE);
177 OUT_BCS_BATCH(batch, 0);
178 OUT_BCS_BATCH(batch, 0);
180 if(IS_KBL(i965->intel.device_info))
182 OUT_BCS_BATCH(batch, 0);
183 OUT_BCS_BATCH(batch, 0);
186 ADVANCE_BCS_BATCH(batch);
190 gen9_hcpe_surface_state(VADriverContextP ctx, struct encode_state *encode_state,
191 struct intel_encoder_context *encoder_context)
193 struct intel_batchbuffer *batch = encoder_context->base.batch;
194 struct object_surface *obj_surface = encode_state->reconstructed_object;
195 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
196 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
197 unsigned int surface_format = SURFACE_FORMAT_PLANAR_420_8;
200 unsigned int y_cb_offset;
204 if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
205 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
207 assert(obj_surface->fourcc == VA_FOURCC_P010);
208 surface_format = SURFACE_FORMAT_P010;
211 y_cb_offset = obj_surface->y_cb_offset;
213 BEGIN_BCS_BATCH(batch, 3);
214 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
216 (1 << 28) | /* surface id */
217 (mfc_context->surface_state.w_pitch - 1)); /* pitch - 1 */
219 surface_format << 28 |
221 ADVANCE_BCS_BATCH(batch);
223 BEGIN_BCS_BATCH(batch, 3);
224 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
226 (0 << 28) | /* surface id */
227 (mfc_context->surface_state.w_pitch - 1)); /* pitch - 1 */
229 surface_format << 28 |
231 ADVANCE_BCS_BATCH(batch);
235 gen9_hcpe_pipe_buf_addr_state(VADriverContextP ctx, struct encode_state *encode_state,
236 struct intel_encoder_context *encoder_context)
238 struct i965_driver_data *i965 = i965_driver_data(ctx);
239 struct intel_batchbuffer *batch = encoder_context->base.batch;
240 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
241 struct object_surface *obj_surface;
242 GenHevcSurface *hcpe_hevc_surface;
246 if(IS_KBL(i965->intel.device_info))
248 BEGIN_BCS_BATCH(batch, 104);
250 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
254 BEGIN_BCS_BATCH(batch, 95);
256 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
259 obj_surface = encode_state->reconstructed_object;
260 assert(obj_surface && obj_surface->bo);
261 hcpe_hevc_surface = obj_surface->private_data;
262 assert(hcpe_hevc_surface && hcpe_hevc_surface->motion_vector_temporal_bo);
264 OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
265 OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
266 OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
267 OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
268 OUT_BUFFER_MA_TARGET(mfc_context->metadata_line_buffer.bo); /* DW 13..15 */
269 OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_line_buffer.bo); /* DW 16..18 */
270 OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_column_buffer.bo); /* DW 19..21 */
271 OUT_BUFFER_MA_TARGET(mfc_context->sao_line_buffer.bo); /* DW 22..24 */
272 OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_line_buffer.bo); /* DW 25..27 */
273 OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_column_buffer.bo); /* DW 28..30 */
274 OUT_BUFFER_MA_TARGET(hcpe_hevc_surface->motion_vector_temporal_bo); /* DW 31..33 */
275 OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
277 /* here only max 8 reference allowed */
278 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
279 bo = mfc_context->reference_surfaces[i].bo;
282 OUT_BUFFER_NMA_REFERENCE(bo);
284 OUT_BUFFER_NMA_REFERENCE(NULL);
286 OUT_BCS_BATCH(batch, 0); /* DW 53, memory address attributes */
288 OUT_BUFFER_MA_TARGET(mfc_context->uncompressed_picture_source.bo); /* DW 54..56, uncompressed picture source */
289 OUT_BUFFER_MA_TARGET(NULL); /* DW 57..59, ignore */
290 OUT_BUFFER_MA_TARGET(NULL); /* DW 60..62, ignore */
291 OUT_BUFFER_MA_TARGET(NULL); /* DW 63..65, ignore */
293 for (i = 0; i < ARRAY_ELEMS(mfc_context->current_collocated_mv_temporal_buffer) - 1; i++) {
294 bo = mfc_context->current_collocated_mv_temporal_buffer[i].bo;
297 OUT_BUFFER_NMA_REFERENCE(bo);
299 OUT_BUFFER_NMA_REFERENCE(NULL);
301 OUT_BCS_BATCH(batch, 0); /* DW 82, memory address attributes */
303 OUT_BUFFER_MA_TARGET(NULL); /* DW 83..85, ignore for HEVC */
304 OUT_BUFFER_MA_TARGET(NULL); /* DW 86..88, ignore for HEVC */
305 OUT_BUFFER_MA_TARGET(NULL); /* DW 89..91, ignore for HEVC */
306 OUT_BUFFER_MA_TARGET(NULL); /* DW 92..94, ignore for HEVC */
308 if(IS_KBL(i965->intel.device_info))
311 OUT_BCS_BATCH(batch, 0);
314 ADVANCE_BCS_BATCH(batch);
318 gen9_hcpe_ind_obj_base_addr_state(VADriverContextP ctx,
319 struct intel_encoder_context *encoder_context)
321 struct i965_driver_data *i965 = i965_driver_data(ctx);
322 struct intel_batchbuffer *batch = encoder_context->base.batch;
323 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
326 BEGIN_BCS_BATCH(batch, 14);
328 OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (14 - 2));
329 OUT_BUFFER_MA_REFERENCE(NULL); /* DW 1..3 igonre for encoder*/
330 OUT_BUFFER_NMA_REFERENCE(NULL); /* DW 4..5, Upper Bound */
331 OUT_BUFFER_MA_TARGET(mfc_context->hcp_indirect_cu_object.bo); /* DW 6..8, CU */
332 /* DW 9..11, PAK-BSE */
333 OUT_BCS_RELOC64(batch,
334 mfc_context->hcp_indirect_pak_bse_object.bo,
335 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
336 mfc_context->hcp_indirect_pak_bse_object.offset);
337 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
338 OUT_BCS_RELOC64(batch,
339 mfc_context->hcp_indirect_pak_bse_object.bo,
340 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
341 mfc_context->hcp_indirect_pak_bse_object.end_offset);
343 ADVANCE_BCS_BATCH(batch);
347 gen9_hcpe_fqm_state(VADriverContextP ctx,
354 struct intel_encoder_context *encoder_context)
356 struct intel_batchbuffer *batch = encoder_context->base.batch;
357 unsigned int fqm_buffer[32];
359 assert(fqm_length <= 32);
360 assert(sizeof(*fqm) == 4);
361 memset(fqm_buffer, 0, sizeof(fqm_buffer));
362 memcpy(fqm_buffer, fqm, fqm_length * 4);
364 BEGIN_BCS_BATCH(batch, 34);
366 OUT_BCS_BATCH(batch, HCP_FQM_STATE | (34 - 2));
369 color_component << 3 |
372 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
374 ADVANCE_BCS_BATCH(batch);
379 gen9_hcpe_hevc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
381 unsigned int qm[32] = {
382 0x10001000, 0x10001000, 0x10001000, 0x10001000,
383 0x10001000, 0x10001000, 0x10001000, 0x10001000,
384 0x10001000, 0x10001000, 0x10001000, 0x10001000,
385 0x10001000, 0x10001000, 0x10001000, 0x10001000,
386 0x10001000, 0x10001000, 0x10001000, 0x10001000,
387 0x10001000, 0x10001000, 0x10001000, 0x10001000,
388 0x10001000, 0x10001000, 0x10001000, 0x10001000,
389 0x10001000, 0x10001000, 0x10001000, 0x10001000
392 gen9_hcpe_fqm_state(ctx,
396 gen9_hcpe_fqm_state(ctx,
400 gen9_hcpe_fqm_state(ctx,
404 gen9_hcpe_fqm_state(ctx,
408 gen9_hcpe_fqm_state(ctx,
412 gen9_hcpe_fqm_state(ctx,
416 gen9_hcpe_fqm_state(ctx,
420 gen9_hcpe_fqm_state(ctx,
427 gen9_hcpe_qm_state(VADriverContextP ctx,
434 struct intel_encoder_context *encoder_context)
436 struct intel_batchbuffer *batch = encoder_context->base.batch;
437 unsigned int qm_buffer[16];
439 assert(qm_length <= 16);
440 assert(sizeof(*qm) == 4);
441 memset(qm_buffer, 0, sizeof(qm_buffer));
442 memcpy(qm_buffer, qm, qm_length * 4);
444 BEGIN_BCS_BATCH(batch, 18);
446 OUT_BCS_BATCH(batch, HCP_QM_STATE | (18 - 2));
449 color_component << 3 |
452 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
454 ADVANCE_BCS_BATCH(batch);
458 gen9_hcpe_hevc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
463 unsigned int qm[16] = {
464 0x10101010, 0x10101010, 0x10101010, 0x10101010,
465 0x10101010, 0x10101010, 0x10101010, 0x10101010,
466 0x10101010, 0x10101010, 0x10101010, 0x10101010,
467 0x10101010, 0x10101010, 0x10101010, 0x10101010
470 for (i = 0; i < 6; i++) {
471 gen9_hcpe_qm_state(ctx,
477 for (i = 0; i < 6; i++) {
478 gen9_hcpe_qm_state(ctx,
484 for (i = 0; i < 6; i++) {
485 gen9_hcpe_qm_state(ctx,
491 for (i = 0; i < 2; i++) {
492 gen9_hcpe_qm_state(ctx,
500 gen9_hcpe_hevc_pic_state(VADriverContextP ctx, struct encode_state *encode_state,
501 struct intel_encoder_context *encoder_context)
503 struct i965_driver_data *i965 = i965_driver_data(ctx);
504 struct intel_batchbuffer *batch = encoder_context->base.batch;
505 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
506 VAEncPictureParameterBufferHEVC *pic_param ;
507 VAEncSequenceParameterBufferHEVC *seq_param ;
509 int max_pcm_size_minus3 = 0, min_pcm_size_minus3 = 0;
510 int pcm_sample_bit_depth_luma_minus1 = 7, pcm_sample_bit_depth_chroma_minus1 = 7;
514 * When not present, the value of loop_filter_across_tiles_enabled_flag
515 * is inferred to be equal to 1.
517 int loop_filter_across_tiles_enabled_flag = 0;
518 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
519 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
521 int log2_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
522 int log2_ctb_size = seq_param->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
523 int ctb_size = 1 << log2_ctb_size;
524 double rawctubits = 8 * 3 * ctb_size * ctb_size / 2.0;
525 int maxctubits = (int)(5 * rawctubits / 3) ;
526 double bitrate = (double)encoder_context->brc.bits_per_second[0];
527 double framebitrate = bitrate / 32 / 8; //32 byte unit
528 int minframebitrate = 0;//(int) (framebitrate * 3 / 10);
529 int maxframebitrate = (int)(framebitrate * 10 / 10);
530 int maxdeltaframebitrate = 0x1c5c; //(int) (framebitrate * 1/ 10);
531 int mindeltaframebitrate = 0; //(int) (framebitrate * 1/ 10);
532 int minframesize = 0;//(int)(rawframebits * 1/50);
534 if (seq_param->seq_fields.bits.pcm_enabled_flag) {
535 max_pcm_size_minus3 = seq_param->log2_max_pcm_luma_coding_block_size_minus3;
536 min_pcm_size_minus3 = seq_param->log2_min_pcm_luma_coding_block_size_minus3;
537 pcm_sample_bit_depth_luma_minus1 = (seq_param->pcm_sample_bit_depth_luma_minus1 & 0x0f);
538 pcm_sample_bit_depth_chroma_minus1 = (seq_param->pcm_sample_bit_depth_chroma_minus1 & 0x0f);
540 max_pcm_size_minus3 = MIN(seq_param->log2_min_luma_coding_block_size_minus3 + seq_param->log2_diff_max_min_luma_coding_block_size, 2);
543 if (pic_param->pic_fields.bits.tiles_enabled_flag)
544 loop_filter_across_tiles_enabled_flag = pic_param->pic_fields.bits.loop_filter_across_tiles_enabled_flag;
546 /* set zero for encoder */
547 loop_filter_across_tiles_enabled_flag = 0;
549 if(IS_KBL(i965->intel.device_info))
551 BEGIN_BCS_BATCH(batch, 31);
553 OUT_BCS_BATCH(batch, HCP_PIC_STATE | (31 - 2));
557 BEGIN_BCS_BATCH(batch, 19);
559 OUT_BCS_BATCH(batch, HCP_PIC_STATE | (19 - 2));
563 mfc_context->pic_size.picture_height_in_min_cb_minus1 << 16 |
565 mfc_context->pic_size.picture_width_in_min_cb_minus1);
567 max_pcm_size_minus3 << 10 |
568 min_pcm_size_minus3 << 8 |
569 (seq_param->log2_min_transform_block_size_minus2 +
570 seq_param->log2_diff_max_min_transform_block_size) << 6 |
571 seq_param->log2_min_transform_block_size_minus2 << 4 |
572 (seq_param->log2_min_luma_coding_block_size_minus3 +
573 seq_param->log2_diff_max_min_luma_coding_block_size) << 2 |
574 seq_param->log2_min_luma_coding_block_size_minus3);
575 OUT_BCS_BATCH(batch, 0); /* DW 3, ignored */
577 (IS_KBL(i965->intel.device_info)? 1 : 0) << 27 | /* CU packet structure is 0 for SKL */
578 seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag << 26 |
579 pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25 |
580 seq_param->seq_fields.bits.amp_enabled_flag << 23 |
581 pic_param->pic_fields.bits.transform_skip_enabled_flag << 22 |
582 0 << 21 | /* 0 for encoder !(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD)*/
583 0 << 20 | /* 0 for encoder !!(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_FIELD_PIC)*/
584 pic_param->pic_fields.bits.weighted_pred_flag << 19 |
585 pic_param->pic_fields.bits.weighted_bipred_flag << 18 |
586 pic_param->pic_fields.bits.tiles_enabled_flag << 17 | /* 0 for encoder */
587 pic_param->pic_fields.bits.entropy_coding_sync_enabled_flag << 16 |
588 loop_filter_across_tiles_enabled_flag << 15 |
589 pic_param->pic_fields.bits.sign_data_hiding_enabled_flag << 13 | /* 0 for encoder */
590 pic_param->log2_parallel_merge_level_minus2 << 10 | /* 0 for encoder */
591 pic_param->pic_fields.bits.constrained_intra_pred_flag << 9 | /* 0 for encoder */
592 seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag << 8 |
593 (pic_param->diff_cu_qp_delta_depth & 0x03) << 6 | /* 0 for encoder */
594 pic_param->pic_fields.bits.cu_qp_delta_enabled_flag << 5 | /* 0 for encoder */
595 seq_param->seq_fields.bits.pcm_enabled_flag << 4 |
596 seq_param->seq_fields.bits.sample_adaptive_offset_enabled_flag << 3 | /* 0 for encoder */
599 seq_param->seq_fields.bits.bit_depth_luma_minus8 << 27 | /* 10 bit for KBL+*/
600 seq_param->seq_fields.bits.bit_depth_chroma_minus8 << 24 | /* 10 bit for KBL+ */
601 pcm_sample_bit_depth_luma_minus1 << 20 |
602 pcm_sample_bit_depth_chroma_minus1 << 16 |
603 seq_param->max_transform_hierarchy_depth_inter << 13 | /* for encoder */
604 seq_param->max_transform_hierarchy_depth_intra << 10 | /* for encoder */
605 (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
606 (pic_param->pps_cb_qp_offset & 0x1f));
608 0 << 29 | /* must be 0 for encoder */
609 maxctubits); /* DW 6, max LCU bit size allowed for encoder */
611 0 << 31 | /* frame bitrate max unit */
612 maxframebitrate); /* DW 7, frame bitrate max 0:13 */
614 0 << 31 | /* frame bitrate min unit */
615 minframebitrate); /* DW 8, frame bitrate min 0:13 */
617 maxdeltaframebitrate << 16 | /* frame bitrate max delta ,help to select deltaQP of slice*/
618 mindeltaframebitrate); /* DW 9,(0,14) frame bitrate min delta ,help to select deltaQP of slice*/
619 OUT_BCS_BATCH(batch, 0x07050402); /* DW 10, frame delta qp max */
620 OUT_BCS_BATCH(batch, 0x0d0b0908);
621 OUT_BCS_BATCH(batch, 0); /* DW 12, frame delta qp min */
622 OUT_BCS_BATCH(batch, 0);
623 OUT_BCS_BATCH(batch, 0x04030200); /* DW 14, frame delta qp max range */
624 OUT_BCS_BATCH(batch, 0x100c0806); /* DW 15 */
625 OUT_BCS_BATCH(batch, 0x04030200); /* DW 16, frame delta qp min range */
626 OUT_BCS_BATCH(batch, 0x100c0806);
629 minframesize); /* DW 18, min frame size units */
631 if(IS_KBL(i965->intel.device_info))
635 for(i = 0;i < 12;i++)
636 OUT_BCS_BATCH(batch, 0);
639 ADVANCE_BCS_BATCH(batch);
644 gen9_hcpe_hevc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
645 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
646 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
647 struct intel_batchbuffer *batch)
650 batch = encoder_context->base.batch;
652 if (data_bits_in_last_dw == 0)
653 data_bits_in_last_dw = 32;
655 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
657 OUT_BCS_BATCH(batch, HCP_INSERT_PAK_OBJECT | (lenght_in_dws + 2 - 2));
659 (0 << 31) | /* inline payload */
660 (0 << 16) | /* always start at offset 0 */
661 (0 << 15) | /* HeaderLengthExcludeFrmSize */
662 (data_bits_in_last_dw << 8) |
663 (skip_emul_byte_count << 4) |
664 (!!emulation_flag << 3) |
665 ((!!is_last_header) << 2) |
666 ((!!is_end_of_slice) << 1) |
667 (0 << 0)); /* Reserved */
668 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
670 ADVANCE_BCS_BATCH(batch);
675 intel_get_ref_idx_state_1(VAPictureHEVC *va_pic, unsigned int frame_store_id)
677 unsigned int is_long_term =
678 !!(va_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE);
679 unsigned int is_top_field =
680 !!!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
681 unsigned int is_bottom_field =
682 !!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
684 return ((is_long_term << 6) |
685 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
686 (frame_store_id << 1) |
687 ((is_top_field ^ 1) & is_bottom_field));
691 gen9_hcpe_ref_idx_state_1(struct intel_batchbuffer *batch,
693 struct intel_encoder_context *encoder_context,
694 struct encode_state *encode_state)
697 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
698 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
699 uint8_t num_ref_minus1 = (list ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1);
700 VAPictureHEVC *ref_list = (list ? slice_param->ref_pic_list1 : slice_param->ref_pic_list0);
701 struct gen6_vme_context *vme_context = encoder_context->vme_context;
702 struct object_surface *obj_surface;
705 int ref_idx_l0 = (vme_context->ref_index_in_mb[list] & 0xff);
707 if (ref_idx_l0 > 3) {
708 WARN_ONCE("ref_idx_l0 is out of range\n");
712 obj_surface = vme_context->used_reference_objects[list];
714 for (i = 0; i < 16; i++) {
716 obj_surface == encode_state->reference_objects[i]) {
721 if (frame_index == -1) {
722 WARN_ONCE("RefPicList 0 or 1 is not found in DPB!\n");
725 BEGIN_BCS_BATCH(batch, 18);
727 OUT_BCS_BATCH(batch, HCP_REF_IDX_STATE | (18 - 2));
729 num_ref_minus1 << 1 |
732 for (i = 0; i < 16; i++) {
733 if (i < MIN((num_ref_minus1 + 1), 15)) {
734 VAPictureHEVC *ref_pic = &ref_list[i];
735 VAPictureHEVC *curr_pic = &pic_param->decoded_curr_pic;
738 1 << 15 | /* bottom_field_flag 0 */
739 0 << 14 | /* field_pic_flag 0 */
740 !!(ref_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE) << 13 | /* short term is 1 */
741 0 << 12 | /* disable WP */
742 0 << 11 | /* disable WP */
744 (CLAMP(-128, 127, curr_pic->pic_order_cnt - ref_pic->pic_order_cnt) & 0xff));
746 OUT_BCS_BATCH(batch, 0);
750 ADVANCE_BCS_BATCH(batch);
754 intel_hcpe_hevc_ref_idx_state(VADriverContextP ctx,
755 struct encode_state *encode_state,
756 struct intel_encoder_context *encoder_context
759 struct intel_batchbuffer *batch = encoder_context->base.batch;
760 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
762 if (slice_param->slice_type == HEVC_SLICE_I)
765 gen9_hcpe_ref_idx_state_1(batch, 0, encoder_context, encode_state);
767 if (slice_param->slice_type == HEVC_SLICE_P)
770 gen9_hcpe_ref_idx_state_1(batch, 1, encoder_context, encode_state);
774 gen9_hcpe_hevc_slice_state(VADriverContextP ctx,
775 VAEncPictureParameterBufferHEVC *pic_param,
776 VAEncSliceParameterBufferHEVC *slice_param,
777 struct encode_state *encode_state,
778 struct intel_encoder_context *encoder_context,
779 struct intel_batchbuffer *batch)
781 struct i965_driver_data *i965 = i965_driver_data(ctx);
782 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
783 int slice_type = slice_param->slice_type;
785 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
786 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
787 int ctb_size = 1 << log2_ctb_size;
788 int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
789 int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
790 int last_slice = (((slice_param->slice_segment_address + slice_param->num_ctu_in_slice) == (width_in_ctb * height_in_ctb)) ? 1 : 0);
792 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
794 slice_hor_pos = slice_param->slice_segment_address % width_in_ctb;
795 slice_ver_pos = slice_param->slice_segment_address / width_in_ctb;
797 next_slice_hor_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) % width_in_ctb;
798 next_slice_ver_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) / width_in_ctb;
800 /* only support multi slice begin from row start address */
801 assert((slice_param->slice_segment_address % width_in_ctb) == 0);
803 if (last_slice == 1) {
804 if (slice_param->slice_segment_address == 0) {
805 next_slice_hor_pos = 0;
806 next_slice_ver_pos = height_in_ctb;
808 next_slice_hor_pos = 0;
809 next_slice_ver_pos = 0;
813 if(IS_KBL(i965->intel.device_info))
815 BEGIN_BCS_BATCH(batch, 11);
817 OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (11 - 2));
821 BEGIN_BCS_BATCH(batch, 9);
823 OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (9 - 2));
827 slice_ver_pos << 16 |
830 next_slice_ver_pos << 16 |
833 (slice_param->slice_cr_qp_offset & 0x1f) << 17 |
834 (slice_param->slice_cb_qp_offset & 0x1f) << 12 |
835 (pic_param->pic_init_qp + slice_param->slice_qp_delta) << 6 |
836 slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag << 5 |
837 slice_param->slice_fields.bits.dependent_slice_segment_flag << 4 |
842 (slice_param->max_num_merge_cand - 1) << 23 |
843 slice_param->slice_fields.bits.cabac_init_flag << 22 |
844 slice_param->luma_log2_weight_denom << 19 |
845 (slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom) << 16 |
846 slice_param->slice_fields.bits.collocated_from_l0_flag << 15 |
847 (slice_type != HEVC_SLICE_B) << 14 |
848 slice_param->slice_fields.bits.mvd_l1_zero_flag << 13 |
849 slice_param->slice_fields.bits.slice_sao_luma_flag << 12 |
850 slice_param->slice_fields.bits.slice_sao_chroma_flag << 11 |
851 slice_param->slice_fields.bits.slice_loop_filter_across_slices_enabled_flag << 10 |
852 (slice_param->slice_beta_offset_div2 & 0xf) << 5 |
853 (slice_param->slice_tc_offset_div2 & 0xf) << 1 |
854 slice_param->slice_fields.bits.slice_deblocking_filter_disabled_flag);
855 OUT_BCS_BATCH(batch, 0); /* DW 5 ,ignore for encoder.*/
861 1 << 10 | /* header insertion enable */
862 1 << 9 | /* slice data enable */
863 1 << 8 | /* tail insertion enable, must at end of frame, not slice */
864 1 << 2 | /* RBSP or EBSP, EmulationByteSliceInsertEnable */
865 1 << 1 | /* cabacZeroWordInsertionEnable */
866 0); /* Ignored for decoding */
867 OUT_BCS_BATCH(batch, 0); /* PAK-BSE data start offset */
869 if(IS_KBL(i965->intel.device_info))
871 OUT_BCS_BATCH(batch, 0);
872 OUT_BCS_BATCH(batch, 0);
875 ADVANCE_BCS_BATCH(batch);
878 /* HEVC pipe line related */
879 static void gen9_hcpe_hevc_pipeline_picture_programing(VADriverContextP ctx,
880 struct encode_state *encode_state,
881 struct intel_encoder_context *encoder_context)
883 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
885 mfc_context->pipe_mode_select(ctx, HCP_CODEC_HEVC, encoder_context);
886 mfc_context->set_surface_state(ctx, encode_state, encoder_context);
887 gen9_hcpe_pipe_buf_addr_state(ctx, encode_state, encoder_context);
888 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
890 mfc_context->qm_state(ctx, encoder_context);
891 mfc_context->fqm_state(ctx, encoder_context);
892 mfc_context->pic_state(ctx, encode_state, encoder_context);
893 intel_hcpe_hevc_ref_idx_state(ctx, encode_state, encoder_context);
896 static void gen9_hcpe_init(VADriverContextP ctx,
897 struct encode_state *encode_state,
898 struct intel_encoder_context *encoder_context)
901 struct i965_driver_data *i965 = i965_driver_data(ctx);
902 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
903 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
904 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
907 int slice_batchbuffer_size;
908 int slice_type = slice_param->slice_type;
909 int is_inter = (slice_type != HEVC_SLICE_I);
911 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
912 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
913 int ctb_size = 1 << log2_ctb_size;
914 int cu_size = 1 << log2_cu_size;
916 int width_in_ctb = ALIGN(pSequenceParameter->pic_width_in_luma_samples , ctb_size) / ctb_size;
917 int height_in_ctb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, ctb_size) / ctb_size;
918 int width_in_cu = ALIGN(pSequenceParameter->pic_width_in_luma_samples , cu_size) / cu_size;
919 int height_in_cu = ALIGN(pSequenceParameter->pic_height_in_luma_samples, cu_size) / cu_size;
920 int width_in_mb = ALIGN(pSequenceParameter->pic_width_in_luma_samples , 16) / 16;
921 int height_in_mb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, 16) / 16;
923 int num_cu_record = 64;
926 if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
927 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
930 if (log2_ctb_size == 5) num_cu_record = 16;
931 else if (log2_ctb_size == 4) num_cu_record = 4;
932 else if (log2_ctb_size == 6) num_cu_record = 64;
934 /* frame size in samples, cu,ctu, mb */
935 mfc_context->pic_size.picture_width_in_samples = pSequenceParameter->pic_width_in_luma_samples;
936 mfc_context->pic_size.picture_height_in_samples = pSequenceParameter->pic_height_in_luma_samples;
937 mfc_context->pic_size.ctb_size = ctb_size;
938 mfc_context->pic_size.picture_width_in_ctbs = width_in_ctb;
939 mfc_context->pic_size.picture_height_in_ctbs = height_in_ctb;
940 mfc_context->pic_size.min_cb_size = cu_size;
941 mfc_context->pic_size.picture_width_in_min_cb_minus1 = width_in_cu - 1;
942 mfc_context->pic_size.picture_height_in_min_cb_minus1 = height_in_cu - 1;
943 mfc_context->pic_size.picture_width_in_mbs = width_in_mb;
944 mfc_context->pic_size.picture_height_in_mbs = height_in_mb;
946 slice_batchbuffer_size = 64 * width_in_ctb * width_in_ctb + 4096 +
947 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
949 /*Encode common setup for HCP*/
951 dri_bo_unreference(mfc_context->deblocking_filter_line_buffer.bo);
952 mfc_context->deblocking_filter_line_buffer.bo = NULL;
954 dri_bo_unreference(mfc_context->deblocking_filter_tile_line_buffer.bo);
955 mfc_context->deblocking_filter_tile_line_buffer.bo = NULL;
957 dri_bo_unreference(mfc_context->deblocking_filter_tile_column_buffer.bo);
958 mfc_context->deblocking_filter_tile_column_buffer.bo = NULL;
961 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
962 mfc_context->uncompressed_picture_source.bo = NULL;
965 dri_bo_unreference(mfc_context->metadata_line_buffer.bo);
966 mfc_context->metadata_line_buffer.bo = NULL;
968 dri_bo_unreference(mfc_context->metadata_tile_line_buffer.bo);
969 mfc_context->metadata_tile_line_buffer.bo = NULL;
971 dri_bo_unreference(mfc_context->metadata_tile_column_buffer.bo);
972 mfc_context->metadata_tile_column_buffer.bo = NULL;
975 dri_bo_unreference(mfc_context->sao_line_buffer.bo);
976 mfc_context->sao_line_buffer.bo = NULL;
978 dri_bo_unreference(mfc_context->sao_tile_line_buffer.bo);
979 mfc_context->sao_tile_line_buffer.bo = NULL;
981 dri_bo_unreference(mfc_context->sao_tile_column_buffer.bo);
982 mfc_context->sao_tile_column_buffer.bo = NULL;
984 /* mv temporal buffer */
985 for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
986 if (mfc_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
987 dri_bo_unreference(mfc_context->current_collocated_mv_temporal_buffer[i].bo);
988 mfc_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
992 for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
993 if (mfc_context->reference_surfaces[i].bo != NULL)
994 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
995 mfc_context->reference_surfaces[i].bo = NULL;
998 /* indirect data CU recording */
999 dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
1000 mfc_context->hcp_indirect_cu_object.bo = NULL;
1002 dri_bo_unreference(mfc_context->hcp_indirect_pak_bse_object.bo);
1003 mfc_context->hcp_indirect_pak_bse_object.bo = NULL;
1005 /* Current internal buffer for HCP */
1007 size = ALIGN(pSequenceParameter->pic_width_in_luma_samples, 32) >> size_shift;
1009 ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_line_buffer), "line buffer", size);
1010 ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
1012 size = ALIGN(pSequenceParameter->pic_height_in_luma_samples + 6 * width_in_ctb, 32) >> size_shift;
1014 ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
1017 size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 188 + 9 * width_in_ctb + 1023) >> 9;
1019 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
1021 size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 172 + 9 * width_in_ctb + 1023) >> 9;
1023 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1025 size = (((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4) * 176 + 89 * width_in_ctb + 1023) >> 9;
1027 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1029 size = (pSequenceParameter->pic_width_in_luma_samples + 8 * width_in_ctb + 1023) >> 9;
1031 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
1033 size = (pSequenceParameter->pic_width_in_luma_samples + 16 * width_in_ctb + 1023) >> 9;
1035 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1037 size = (pSequenceParameter->pic_height_in_luma_samples + 8 * height_in_ctb + 1023) >> 9;
1039 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1042 size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 3 * width_in_ctb), 16) >> size_shift;
1044 ALLOC_ENCODER_BUFFER((&mfc_context->sao_line_buffer), "sao line buffer", size);
1046 size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 6 * width_in_ctb), 16) >> size_shift;
1048 ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_line_buffer), "sao tile line buffer", size);
1050 size = ALIGN(((pSequenceParameter->pic_height_in_luma_samples >> 1) + 6 * height_in_ctb), 16) >> size_shift;
1052 ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_column_buffer), "sao tile column buffer", size);
1054 /////////////////////
1055 dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
1056 bo = dri_bo_alloc(i965->intel.bufmgr,
1057 "Indirect data CU Buffer",
1058 width_in_ctb * height_in_ctb * num_cu_record * 16 * 4,
1061 mfc_context->hcp_indirect_cu_object.bo = bo;
1063 /* to do pak bse object buffer */
1064 /* to do current collocated mv temporal buffer */
1066 dri_bo_unreference(mfc_context->hcp_batchbuffer_surface.bo);
1067 mfc_context->hcp_batchbuffer_surface.bo = NULL;
1069 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1070 mfc_context->aux_batchbuffer_surface.bo = NULL;
1072 if (mfc_context->aux_batchbuffer)
1073 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1075 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
1076 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
1077 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1078 mfc_context->aux_batchbuffer_surface.pitch = 16;
1079 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
1080 mfc_context->aux_batchbuffer_surface.size_block = 16;
1083 static VAStatus gen9_hcpe_run(VADriverContextP ctx,
1084 struct encode_state *encode_state,
1085 struct intel_encoder_context *encoder_context)
1087 struct intel_batchbuffer *batch = encoder_context->base.batch;
1089 intel_batchbuffer_flush(batch); //run the pipeline
1091 return VA_STATUS_SUCCESS;
1096 gen9_hcpe_stop(VADriverContextP ctx,
1097 struct encode_state *encode_state,
1098 struct intel_encoder_context *encoder_context,
1099 int *encoded_bits_size)
1101 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
1102 VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1103 VACodedBufferSegment *coded_buffer_segment;
1105 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
1106 assert(vaStatus == VA_STATUS_SUCCESS);
1107 *encoded_bits_size = coded_buffer_segment->size * 8;
1108 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
1110 return VA_STATUS_SUCCESS;
1114 int intel_hevc_find_skipemulcnt(unsigned char *buf, int bits_length)
1118 int leading_zero_cnt, byte_length, zero_byte;
1122 #define NAL_UNIT_TYPE_MASK 0x7e
1123 #define HW_MAX_SKIP_LENGTH 15
1125 byte_length = ALIGN(bits_length, 32) >> 3;
1128 leading_zero_cnt = 0;
1130 for (i = 0; i < byte_length - 4; i++) {
1131 if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
1132 ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
1139 /* warning message is complained. But anyway it will be inserted. */
1140 WARN_ONCE("Invalid packed header data. "
1141 "Can't find the 000001 start_prefix code\n");
1144 i = leading_zero_cnt;
1147 if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
1150 skip_cnt = leading_zero_cnt + zero_byte + 3;
1152 /* the unit header byte is accounted */
1153 nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
1155 skip_cnt += 1; /* two bytes length of nal headers in hevc */
1157 if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
1158 /* more unit header bytes are accounted for MVC/SVC */
1161 if (skip_cnt > HW_MAX_SKIP_LENGTH) {
1162 WARN_ONCE("Too many leading zeros are padded for packed data. "
1163 "It is beyond the HW range.!!!\n");
1168 #ifdef HCP_SOFTWARE_SKYLAKE
1171 gen9_hcpe_hevc_pak_object(VADriverContextP ctx, int lcu_x, int lcu_y, int isLast_ctb,
1172 struct intel_encoder_context *encoder_context,
1173 int cu_count_in_lcu, unsigned int split_coding_unit_flag,
1174 struct intel_batchbuffer *batch)
1176 struct i965_driver_data *i965 = i965_driver_data(ctx);
1177 int len_in_dwords = 3;
1179 if(IS_KBL(i965->intel.device_info))
1183 batch = encoder_context->base.batch;
1185 BEGIN_BCS_BATCH(batch, len_in_dwords);
1187 OUT_BCS_BATCH(batch, HCP_PAK_OBJECT | (len_in_dwords - 2));
1188 OUT_BCS_BATCH(batch,
1189 (((isLast_ctb > 0) ? 1 : 0) << 31) | /* last ctb?*/
1190 ((cu_count_in_lcu - 1) << 24) | /* No motion vector */
1191 split_coding_unit_flag);
1193 OUT_BCS_BATCH(batch, (lcu_y << 16) | lcu_x); /* LCU for Y*/
1195 if(IS_KBL(i965->intel.device_info))
1197 OUT_BCS_BATCH(batch, 0);
1198 OUT_BCS_BATCH(batch, 0);
1201 ADVANCE_BCS_BATCH(batch);
1203 return len_in_dwords;
1206 #define AVC_INTRA_RDO_OFFSET 4
1207 #define AVC_INTER_RDO_OFFSET 10
1208 #define AVC_INTER_MSG_OFFSET 8
1209 #define AVC_INTER_MV_OFFSET 48
1210 #define AVC_RDO_MASK 0xFFFF
1212 #define AVC_INTRA_MODE_MASK 0x30
1213 #define AVC_INTRA_16X16 0x00
1214 #define AVC_INTRA_8X8 0x01
1215 #define AVC_INTRA_4X4 0x02
1217 #define AVC_INTER_MODE_MASK 0x03
1218 #define AVC_INTER_8X8 0x03
1219 #define AVC_INTER_8X16 0x02
1220 #define AVC_INTER_16X8 0x01
1221 #define AVC_INTER_16X16 0x00
1222 #define AVC_SUBMB_SHAPE_MASK 0x00FF00
1224 /* VME output message, write back message */
1225 #define AVC_INTER_SUBMB_PRE_MODE_MASK 0x00ff0000
1226 #define AVC_SUBMB_SHAPE_MASK 0x00FF00
1228 /* here 1 MB = 1CU = 16x16 */
1230 gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
1231 struct encode_state *encode_state,
1232 struct intel_encoder_context *encoder_context,
1233 int qp, unsigned int *msg,
1234 int ctb_x, int ctb_y,
1236 int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type,int cu_index,int index)
1238 /* here cu == mb, so we use mb address as the cu address */
1239 /* to fill the indirect cu by the vme out */
1240 static int intra_mode_8x8_avc2hevc[9] = {26, 10, 1, 34, 18, 24, 13, 28, 8};
1241 static int intra_mode_16x16_avc2hevc[4] = {26, 10, 1, 34};
1242 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1243 unsigned char * cu_record_ptr = NULL;
1244 unsigned int * cu_msg = NULL;
1245 int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1246 int mb_address_in_ctb = 0;
1247 int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1250 int intraMbMode = 0;
1251 int cu_part_mode = 0;
1253 int inerpred_idc = 0;
1254 int intra_chroma_mode = 5;
1258 int chroma_mode_remap[4]={5,4,3,2};
1260 if (!is_inter) inerpred_idc = 0xff;
1262 intraMbMode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1264 intra_chroma_mode = (msg[3] & 0x3);
1265 intra_chroma_mode = chroma_mode_remap[intra_chroma_mode];
1266 if (intraMbMode == AVC_INTRA_16X16) {
1267 cu_part_mode = 0; //2Nx2N
1271 intraMode[0] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1272 intraMode[1] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1273 intraMode[2] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1274 intraMode[3] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1275 } else if (intraMbMode == AVC_INTRA_8X8) {
1276 cu_part_mode = 0; //2Nx2N
1280 intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1281 intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1282 intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1283 intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1285 } else { // for 4x4 to use 8x8 replace
1286 cu_part_mode = 3; //NxN
1290 intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 0) & 0xf];
1291 intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 4) & 0xf];
1292 intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 8) & 0xf];
1293 intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 12) & 0xf];
1297 cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1298 /* get the mb info from the vme out */
1299 cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1301 cu_msg[0] = (inerpred_idc << 24 | /* interpred_idc[3:0][1:0] */
1302 zero << 23 | /* reserved */
1303 qp << 16 | /* CU_qp */
1304 zero << 11 | /* reserved */
1305 intra_chroma_mode << 8 | /* intra_chroma_mode */
1306 zero << 7 | /* IPCM_enable , reserved for SKL*/
1307 cu_part_mode << 4 | /* cu_part_mode */
1308 zero << 3 | /* cu_transquant_bypass_flag */
1309 is_inter << 2 | /* cu_pred_mode :intra 1,inter 1*/
1310 cu_size /* cu_size */
1312 cu_msg[1] = (zero << 30 | /* reserved */
1313 intraMode[3] << 24 | /* intra_mode */
1314 zero << 22 | /* reserved */
1315 intraMode[2] << 16 | /* intra_mode */
1316 zero << 14 | /* reserved */
1317 intraMode[1] << 8 | /* intra_mode */
1318 zero << 6 | /* reserved */
1319 intraMode[0] /* intra_mode */
1321 /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1322 cu_msg[2] = (zero << 16 | /* mvx_l0[1] */
1323 zero /* mvx_l0[0] */
1325 cu_msg[3] = (zero << 16 | /* mvx_l0[3] */
1326 zero /* mvx_l0[2] */
1328 cu_msg[4] = (zero << 16 | /* mvy_l0[1] */
1329 zero /* mvy_l0[0] */
1331 cu_msg[5] = (zero << 16 | /* mvy_l0[3] */
1332 zero /* mvy_l0[2] */
1335 cu_msg[6] = (zero << 16 | /* mvx_l1[1] */
1336 zero /* mvx_l1[0] */
1338 cu_msg[7] = (zero << 16 | /* mvx_l1[3] */
1339 zero /* mvx_l1[2] */
1341 cu_msg[8] = (zero << 16 | /* mvy_l1[1] */
1342 zero /* mvy_l1[0] */
1344 cu_msg[9] = (zero << 16 | /* mvy_l1[3] */
1345 zero /* mvy_l1[2] */
1348 cu_msg[10] = (zero << 28 | /* ref_idx_l1[3] */
1349 zero << 24 | /* ref_idx_l1[2] */
1350 zero << 20 | /* ref_idx_l1[1] */
1351 zero << 16 | /* ref_idx_l1[0] */
1352 zero << 12 | /* ref_idx_l0[3] */
1353 zero << 8 | /* ref_idx_l0[2] */
1354 zero << 4 | /* ref_idx_l0[1] */
1355 zero /* ref_idx_l0[0] */
1358 cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010 or 0x0*/
1359 cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1360 zero << 16 | /* reserved */
1361 zero /* tu_xform_Yskip[15:0] */
1363 cu_msg[13] = (zero << 16 | /* tu_xform_Vskip[15:0] */
1364 zero /* tu_xform_Uskip[15:0] */
1370 /* here 1 MB = 1CU = 16x16 */
1372 gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
1373 struct encode_state *encode_state,
1374 struct intel_encoder_context *encoder_context,
1375 int qp, unsigned int *msg,
1376 int ctb_x, int ctb_y,
1378 int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type, int cu_index,int index)
1380 /* here cu == mb, so we use mb address as the cu address */
1381 /* to fill the indirect cu by the vme out */
1382 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1383 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1384 unsigned char * cu_record_ptr = NULL;
1385 unsigned int * cu_msg = NULL;
1386 int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1387 int mb_address_in_ctb = 0;
1388 int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1390 int cu_part_mode = 0;
1391 int submb_pre_mode = 0;
1398 unsigned int *mv_ptr;
1400 inter_mode = (msg[0] & AVC_INTER_MODE_MASK);
1401 submb_pre_mode = (msg[1] & AVC_INTER_SUBMB_PRE_MODE_MASK) >> 16;
1402 #define MSG_MV_OFFSET 4
1403 mv_ptr = msg + MSG_MV_OFFSET;
1404 /* MV of VME output is based on 16 sub-blocks. So it is necessary
1405 * to convert them to be compatible with the format of AVC_PAK
1408 /* 0/2/4/6/8... : l0, 1/3/5/7...: l1 ; now it only support 16x16,16x8,8x16,8x8*/
1410 if (inter_mode == AVC_INTER_16X16) {
1411 mv_ptr[4] = mv_ptr[0];
1412 mv_ptr[5] = mv_ptr[1];
1413 mv_ptr[2] = mv_ptr[0];
1414 mv_ptr[3] = mv_ptr[1];
1415 mv_ptr[6] = mv_ptr[0];
1416 mv_ptr[7] = mv_ptr[1];
1421 } else if (inter_mode == AVC_INTER_8X16) {
1422 mv_ptr[4] = mv_ptr[0];
1423 mv_ptr[5] = mv_ptr[1];
1424 mv_ptr[2] = mv_ptr[8];
1425 mv_ptr[3] = mv_ptr[9];
1426 mv_ptr[6] = mv_ptr[8];
1427 mv_ptr[7] = mv_ptr[9];
1432 } else if (inter_mode == AVC_INTER_16X8) {
1433 mv_ptr[2] = mv_ptr[0];
1434 mv_ptr[3] = mv_ptr[1];
1435 mv_ptr[4] = mv_ptr[16];
1436 mv_ptr[5] = mv_ptr[17];
1437 mv_ptr[6] = mv_ptr[24];
1438 mv_ptr[7] = mv_ptr[25];
1443 }else if(inter_mode == AVC_INTER_8X8) {
1444 mv_ptr[0] = mv_ptr[index * 8 + 0 ];
1445 mv_ptr[1] = mv_ptr[index * 8 + 1 ];
1446 mv_ptr[2] = mv_ptr[index * 8 + 0 ];
1447 mv_ptr[3] = mv_ptr[index * 8 + 1 ];
1448 mv_ptr[4] = mv_ptr[index * 8 + 0 ];
1449 mv_ptr[5] = mv_ptr[index * 8 + 1 ];
1450 mv_ptr[6] = mv_ptr[index * 8 + 0 ];
1451 mv_ptr[7] = mv_ptr[index * 8 + 1 ];
1459 mv_ptr[4] = mv_ptr[0];
1460 mv_ptr[5] = mv_ptr[1];
1461 mv_ptr[2] = mv_ptr[0];
1462 mv_ptr[3] = mv_ptr[1];
1463 mv_ptr[6] = mv_ptr[0];
1464 mv_ptr[7] = mv_ptr[1];
1473 cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1474 /* get the mb info from the vme out */
1475 cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1477 cu_msg[0] = (submb_pre_mode << 24 | /* interpred_idc[3:0][1:0] */
1478 zero << 23 | /* reserved */
1479 qp << 16 | /* CU_qp */
1480 zero << 11 | /* reserved */
1481 5 << 8 | /* intra_chroma_mode */
1482 zero << 7 | /* IPCM_enable , reserved for SKL*/
1483 cu_part_mode << 4 | /* cu_part_mode */
1484 zero << 3 | /* cu_transquant_bypass_flag */
1485 is_inter << 2 | /* cu_pred_mode :intra 1,inter 1*/
1486 cu_size /* cu_size */
1488 cu_msg[1] = (zero << 30 | /* reserved */
1489 zero << 24 | /* intra_mode */
1490 zero << 22 | /* reserved */
1491 zero << 16 | /* intra_mode */
1492 zero << 14 | /* reserved */
1493 zero << 8 | /* intra_mode */
1494 zero << 6 | /* reserved */
1495 zero /* intra_mode */
1497 /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1498 cu_msg[2] = ((mv_ptr[2] & 0xffff) << 16 | /* mvx_l0[1] */
1499 (mv_ptr[0] & 0xffff) /* mvx_l0[0] */
1501 cu_msg[3] = ((mv_ptr[6] & 0xffff) << 16 | /* mvx_l0[3] */
1502 (mv_ptr[4] & 0xffff) /* mvx_l0[2] */
1504 cu_msg[4] = ((mv_ptr[2] & 0xffff0000) | /* mvy_l0[1] */
1505 (mv_ptr[0] & 0xffff0000) >> 16 /* mvy_l0[0] */
1507 cu_msg[5] = ((mv_ptr[6] & 0xffff0000) | /* mvy_l0[3] */
1508 (mv_ptr[4] & 0xffff0000) >> 16 /* mvy_l0[2] */
1511 cu_msg[6] = ((mv_ptr[3] & 0xffff) << 16 | /* mvx_l1[1] */
1512 (mv_ptr[1] & 0xffff) /* mvx_l1[0] */
1514 cu_msg[7] = ((mv_ptr[7] & 0xffff) << 16 | /* mvx_l1[3] */
1515 (mv_ptr[5] & 0xffff) /* mvx_l1[2] */
1517 cu_msg[8] = ((mv_ptr[3] & 0xffff0000) | /* mvy_l1[1] */
1518 (mv_ptr[1] & 0xffff0000) >> 16 /* mvy_l1[0] */
1520 cu_msg[9] = ((mv_ptr[7] & 0xffff0000) | /* mvy_l1[3] */
1521 (mv_ptr[5] & 0xffff0000) >> 16 /* mvy_l1[2] */
1524 cu_msg[10] = (((vme_context->ref_index_in_mb[1] >> 24) & 0xf) << 28 | /* ref_idx_l1[3] */
1525 ((vme_context->ref_index_in_mb[1] >> 16) & 0xf) << 24 | /* ref_idx_l1[2] */
1526 ((vme_context->ref_index_in_mb[1] >> 8) & 0xf) << 20 | /* ref_idx_l1[1] */
1527 ((vme_context->ref_index_in_mb[1] >> 0) & 0xf) << 16 | /* ref_idx_l1[0] */
1528 ((vme_context->ref_index_in_mb[0] >> 24) & 0xf) << 12 | /* ref_idx_l0[3] */
1529 ((vme_context->ref_index_in_mb[0] >> 16) & 0xf) << 8 | /* ref_idx_l0[2] */
1530 ((vme_context->ref_index_in_mb[0] >> 8) & 0xf) << 4 | /* ref_idx_l0[1] */
1531 ((vme_context->ref_index_in_mb[0] >> 0) & 0xf) /* ref_idx_l0[0] */
1534 cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010 or 0x0*/
1535 cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1536 zero << 16 | /* reserved */
1537 zero /* tu_xform_Yskip[15:0] */
1539 cu_msg[13] = (zero << 16 | /* tu_xform_Vskip[15:0] */
1540 zero /* tu_xform_Uskip[15:0] */
1546 #define HEVC_SPLIT_CU_FLAG_64_64 ((0x1<<20)|(0xf<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1547 #define HEVC_SPLIT_CU_FLAG_32_32 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1548 #define HEVC_SPLIT_CU_FLAG_16_16 ((0x0<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1549 #define HEVC_SPLIT_CU_FLAG_8_8 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1553 intel_hevc_slice_insert_packed_data(VADriverContextP ctx,
1554 struct encode_state *encode_state,
1555 struct intel_encoder_context *encoder_context,
1557 struct intel_batchbuffer *slice_batch)
1559 int count, i, start_index;
1560 unsigned int length_in_bits;
1561 VAEncPackedHeaderParameterBuffer *param = NULL;
1562 unsigned int *header_data = NULL;
1563 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1564 int slice_header_index;
1566 if (encode_state->slice_header_index[slice_index] == 0)
1567 slice_header_index = -1;
1569 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1571 count = encode_state->slice_rawdata_count[slice_index];
1572 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1574 for (i = 0; i < count; i++) {
1575 unsigned int skip_emul_byte_cnt;
1577 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1579 param = (VAEncPackedHeaderParameterBuffer *)
1580 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1582 /* skip the slice header packed data type as it is lastly inserted */
1583 if (param->type == VAEncPackedHeaderSlice)
1586 length_in_bits = param->bit_length;
1588 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1590 /* as the slice header is still required, the last header flag is set to
1593 mfc_context->insert_object(ctx,
1596 ALIGN(length_in_bits, 32) >> 5,
1597 length_in_bits & 0x1f,
1601 !param->has_emulation_bytes,
1605 if (slice_header_index == -1) {
1606 unsigned char *slice_header = NULL;
1607 int slice_header_length_in_bits = 0;
1608 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1609 VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1610 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1612 /* For the Normal HEVC */
1613 slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter,
1618 mfc_context->insert_object(ctx, encoder_context,
1619 (unsigned int *)slice_header,
1620 ALIGN(slice_header_length_in_bits, 32) >> 5,
1621 slice_header_length_in_bits & 0x1f,
1622 5, /* first 6 bytes are start code + nal unit type */
1623 1, 0, 1, slice_batch);
1626 unsigned int skip_emul_byte_cnt;
1628 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1630 param = (VAEncPackedHeaderParameterBuffer *)
1631 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1632 length_in_bits = param->bit_length;
1634 /* as the slice header is the last header data for one slice,
1635 * the last header flag is set to one.
1637 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1639 mfc_context->insert_object(ctx,
1642 ALIGN(length_in_bits, 32) >> 5,
1643 length_in_bits & 0x1f,
1647 !param->has_emulation_bytes,
1655 gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,
1656 struct encode_state *encode_state,
1657 struct intel_encoder_context *encoder_context,
1659 struct intel_batchbuffer *slice_batch)
1661 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1662 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1663 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1664 VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1665 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1666 int qp_slice = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1667 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1668 //unsigned char *slice_header = NULL; // for future use
1669 //int slice_header_length_in_bits = 0;
1670 unsigned int tail_data[] = { 0x0, 0x0 };
1671 int slice_type = pSliceParameter->slice_type;
1673 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1674 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1675 int ctb_size = 1 << log2_ctb_size;
1676 int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
1677 int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
1678 int last_slice = (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice) == (width_in_ctb * height_in_ctb);
1679 int ctb_width_in_mb = (ctb_size + 15) / 16;
1680 int i_ctb, ctb_x, ctb_y;
1681 unsigned int split_coding_unit_flag = 0;
1682 int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15) / 16;
1683 int row_pad_flag = (pSequenceParameter->pic_height_in_luma_samples % ctb_size)> 0 ? 1:0;
1684 int col_pad_flag = (pSequenceParameter->pic_width_in_luma_samples % ctb_size)> 0 ? 1:0;
1686 int is_intra = (slice_type == HEVC_SLICE_I);
1687 unsigned int *msg = NULL;
1688 unsigned char *msg_ptr = NULL;
1689 int macroblock_address = 0;
1690 int num_cu_record = 64;
1692 int tmp_mb_mode = 0;
1693 int mb_x = 0, mb_y = 0;
1696 int inter_rdo, intra_rdo;
1698 int drop_cu_row_in_last_mb = 0;
1699 int drop_cu_column_in_last_mb = 0;
1701 if (log2_ctb_size == 5) num_cu_record = 16;
1702 else if (log2_ctb_size == 4) num_cu_record = 4;
1703 else if (log2_ctb_size == 6) num_cu_record = 64;
1706 if (rate_control_mode == VA_RC_CBR) {
1707 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1708 if(slice_type == HEVC_SLICE_B) {
1709 if(pSequenceParameter->ip_period == 1)
1711 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1713 }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
1714 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1717 if (encode_state->slice_header_index[slice_index] == 0) {
1718 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1722 /* only support for 8-bit pixel bit-depth */
1723 assert(pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 <= 2);
1724 assert(pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 <= 2);
1725 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1726 assert(qp >= 0 && qp < 52);
1729 gen9_hcpe_hevc_slice_state(ctx,
1732 encode_state, encoder_context,
1735 if (slice_index == 0)
1736 intel_hcpe_hevc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1738 intel_hevc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1741 slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header, slice_index);
1742 int skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)slice_header, slice_header_length_in_bits);
1744 mfc_context->insert_object(ctx, encoder_context,
1745 (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1747 1, 0, 1, slice_batch);
1754 split_coding_unit_flag = (ctb_width_in_mb == 4) ? HEVC_SPLIT_CU_FLAG_64_64 : ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1756 dri_bo_map(vme_context->vme_output.bo , 1);
1757 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1758 dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
1760 for (i_ctb = pSliceParameter->slice_segment_address;i_ctb < pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice; i_ctb++) {
1761 int last_ctb = (i_ctb == (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice - 1));
1762 int ctb_height_in_mb_internal = ctb_width_in_mb;
1763 int ctb_width_in_mb_internal = ctb_width_in_mb;
1764 int max_cu_num_in_mb = 4;
1766 ctb_x = i_ctb % width_in_ctb;
1767 ctb_y = i_ctb / width_in_ctb;
1769 drop_cu_row_in_last_mb = 0;
1770 drop_cu_column_in_last_mb = 0;
1772 if(ctb_y == (height_in_ctb - 1) && row_pad_flag)
1774 ctb_height_in_mb_internal = (pSequenceParameter->pic_height_in_luma_samples - (ctb_y * ctb_size) + 15)/16;
1776 if((log2_cu_size == 3) && (pSequenceParameter->pic_height_in_luma_samples % 16))
1777 drop_cu_row_in_last_mb = (16 - (pSequenceParameter->pic_height_in_luma_samples % 16))>>log2_cu_size;
1780 if(ctb_x == (width_in_ctb - 1) && col_pad_flag)
1782 ctb_width_in_mb_internal = (pSequenceParameter->pic_width_in_luma_samples - (ctb_x * ctb_size) + 15) / 16;
1784 if((log2_cu_size == 3) && (pSequenceParameter->pic_width_in_luma_samples % 16))
1785 drop_cu_column_in_last_mb = (16 - (pSequenceParameter->pic_width_in_luma_samples % 16))>>log2_cu_size;
1790 macroblock_address = ctb_y * width_in_mbs * ctb_width_in_mb + ctb_x * ctb_width_in_mb;
1791 split_coding_unit_flag = ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1796 for (mb_y = 0; mb_y < ctb_height_in_mb_internal; mb_y++)
1798 mb_addr = macroblock_address + mb_y * width_in_mbs ;
1799 for (mb_x = 0; mb_x < ctb_width_in_mb_internal; mb_x++)
1801 max_cu_num_in_mb = 4;
1802 if(drop_cu_row_in_last_mb && (mb_y == ctb_height_in_mb_internal - 1))
1803 max_cu_num_in_mb /= 2;
1805 if(drop_cu_column_in_last_mb && (mb_x == ctb_width_in_mb_internal - 1))
1806 max_cu_num_in_mb /= 2;
1808 /* get the mb info from the vme out */
1809 msg = (unsigned int *)(msg_ptr + mb_addr * vme_context->vme_output.size_block);
1811 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1812 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1813 /*fill to indirect cu */
1815 if (is_intra || intra_rdo < inter_rdo) {
1817 tmp_mb_mode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1818 if(max_cu_num_in_mb < 4){
1819 if(tmp_mb_mode == AVC_INTRA_16X16)
1821 msg[0] = (msg[0] & !AVC_INTRA_MODE_MASK) | (AVC_INTRA_8X8<<4);
1822 tmp_mb_mode = AVC_INTRA_8X8;
1825 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1826 if(--max_cu_num_in_mb > 0)
1827 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
1829 if(ctb_width_in_mb == 2)
1830 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1831 else if(ctb_width_in_mb == 1)
1832 split_coding_unit_flag |= 0x1 << 20;
1834 else if(tmp_mb_mode == AVC_INTRA_16X16) {
1835 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1836 } else { // for 4x4 to use 8x8 replace
1837 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1838 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
1839 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
1840 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
1841 if(ctb_width_in_mb == 2)
1842 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1843 else if(ctb_width_in_mb == 1)
1844 split_coding_unit_flag |= 0x1 << 20;
1847 msg += AVC_INTER_MSG_OFFSET;
1849 tmp_mb_mode = msg[0] & AVC_INTER_MODE_MASK;
1850 if(max_cu_num_in_mb < 4)
1852 if(tmp_mb_mode != AVC_INTER_8X8)
1854 msg[0] = (msg[0] & !AVC_INTER_MODE_MASK) | AVC_INTER_8X8;
1855 tmp_mb_mode = AVC_INTER_8X8;
1857 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1858 if(--max_cu_num_in_mb > 0)
1859 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
1861 if(ctb_width_in_mb == 2)
1862 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1863 else if(ctb_width_in_mb == 1)
1864 split_coding_unit_flag |= 0x1 << 20;
1866 else if (tmp_mb_mode == AVC_INTER_8X8){
1867 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1868 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
1869 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
1870 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
1871 if(ctb_width_in_mb == 2)
1872 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1873 else if(ctb_width_in_mb == 1)
1874 split_coding_unit_flag |= 0x1 << 20;
1876 }else if(tmp_mb_mode == AVC_INTER_16X16 ||
1877 tmp_mb_mode == AVC_INTER_8X16 ||
1878 tmp_mb_mode == AVC_INTER_16X8) {
1879 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
1886 cu_count = cu_index;
1887 // PAK object fill accordingly.
1888 gen9_hcpe_hevc_pak_object(ctx, ctb_x, ctb_y, last_ctb, encoder_context, cu_count, split_coding_unit_flag, slice_batch);
1891 dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
1892 dri_bo_unmap(vme_context->vme_output.bo);
1895 mfc_context->insert_object(ctx, encoder_context,
1897 2, 1, 1, 0, slice_batch);
1899 mfc_context->insert_object(ctx, encoder_context,
1901 1, 1, 1, 0, slice_batch);
1906 gen9_hcpe_hevc_software_batchbuffer(VADriverContextP ctx,
1907 struct encode_state *encode_state,
1908 struct intel_encoder_context *encoder_context)
1910 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1911 struct intel_batchbuffer *batch;
1915 batch = mfc_context->aux_batchbuffer;
1916 batch_bo = batch->buffer;
1918 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1919 gen9_hcpe_hevc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1922 intel_batchbuffer_align(batch, 8);
1924 BEGIN_BCS_BATCH(batch, 2);
1925 OUT_BCS_BATCH(batch, 0);
1926 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1927 ADVANCE_BCS_BATCH(batch);
1929 dri_bo_reference(batch_bo);
1930 intel_batchbuffer_free(batch);
1931 mfc_context->aux_batchbuffer = NULL;
1941 gen9_hcpe_hevc_pipeline_programing(VADriverContextP ctx,
1942 struct encode_state *encode_state,
1943 struct intel_encoder_context *encoder_context)
1945 struct i965_driver_data *i965 = i965_driver_data(ctx);
1946 struct intel_batchbuffer *batch = encoder_context->base.batch;
1947 dri_bo *slice_batch_bo;
1949 #ifdef HCP_SOFTWARE_SKYLAKE
1950 slice_batch_bo = gen9_hcpe_hevc_software_batchbuffer(ctx, encode_state, encoder_context);
1952 slice_batch_bo = gen9_hcpe_hevc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1956 if (i965->intel.has_bsd2)
1957 intel_batchbuffer_start_atomic_bcs_override(batch, 0x4000, BSD_RING0);
1959 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1960 intel_batchbuffer_emit_mi_flush(batch);
1962 // picture level programing
1963 gen9_hcpe_hevc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1965 BEGIN_BCS_BATCH(batch, 3);
1966 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1967 OUT_BCS_RELOC64(batch,
1969 I915_GEM_DOMAIN_COMMAND, 0,
1971 ADVANCE_BCS_BATCH(batch);
1974 intel_batchbuffer_end_atomic(batch);
1976 dri_bo_unreference(slice_batch_bo);
1979 void intel_hcpe_hevc_pipeline_header_programing(VADriverContextP ctx,
1980 struct encode_state *encode_state,
1981 struct intel_encoder_context *encoder_context,
1982 struct intel_batchbuffer *slice_batch)
1984 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1985 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS);
1986 unsigned int skip_emul_byte_cnt;
1988 if (encode_state->packed_header_data[idx]) {
1989 VAEncPackedHeaderParameterBuffer *param = NULL;
1990 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1991 unsigned int length_in_bits;
1993 assert(encode_state->packed_header_param[idx]);
1994 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1995 length_in_bits = param->bit_length;
1997 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1998 mfc_context->insert_object(ctx,
2001 ALIGN(length_in_bits, 32) >> 5,
2002 length_in_bits & 0x1f,
2006 !param->has_emulation_bytes,
2010 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS) + 1; // index to SPS
2012 if (encode_state->packed_header_data[idx]) {
2013 VAEncPackedHeaderParameterBuffer *param = NULL;
2014 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2015 unsigned int length_in_bits;
2017 assert(encode_state->packed_header_param[idx]);
2018 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2019 length_in_bits = param->bit_length;
2021 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2022 mfc_context->insert_object(ctx,
2025 ALIGN(length_in_bits, 32) >> 5,
2026 length_in_bits & 0x1f,
2030 !param->has_emulation_bytes,
2034 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_PPS);
2036 if (encode_state->packed_header_data[idx]) {
2037 VAEncPackedHeaderParameterBuffer *param = NULL;
2038 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2039 unsigned int length_in_bits;
2041 assert(encode_state->packed_header_param[idx]);
2042 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2043 length_in_bits = param->bit_length;
2045 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2047 mfc_context->insert_object(ctx,
2050 ALIGN(length_in_bits, 32) >> 5,
2051 length_in_bits & 0x1f,
2055 !param->has_emulation_bytes,
2059 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_SEI);
2061 if (encode_state->packed_header_data[idx]) {
2062 VAEncPackedHeaderParameterBuffer *param = NULL;
2063 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2064 unsigned int length_in_bits;
2066 assert(encode_state->packed_header_param[idx]);
2067 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2068 length_in_bits = param->bit_length;
2070 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2071 mfc_context->insert_object(ctx,
2074 ALIGN(length_in_bits, 32) >> 5,
2075 length_in_bits & 0x1f,
2079 !param->has_emulation_bytes,
2084 VAStatus intel_hcpe_hevc_prepare(VADriverContextP ctx,
2085 struct encode_state *encode_state,
2086 struct intel_encoder_context *encoder_context)
2088 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2089 struct object_surface *obj_surface;
2090 struct object_buffer *obj_buffer;
2091 GenHevcSurface *hevc_encoder_surface;
2093 VAStatus vaStatus = VA_STATUS_SUCCESS;
2095 struct i965_coded_buffer_segment *coded_buffer_segment;
2097 /*Setup all the input&output object*/
2099 /* Setup current frame and current direct mv buffer*/
2100 obj_surface = encode_state->reconstructed_object;
2102 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2103 assert(hevc_encoder_surface);
2105 if (hevc_encoder_surface) {
2106 hevc_encoder_surface->has_p010_to_nv12_done=0;
2107 hevc_encoder_surface->base.frame_store_id = -1;
2108 mfc_context->current_collocated_mv_temporal_buffer[NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS - 1].bo = hevc_encoder_surface->motion_vector_temporal_bo;
2109 dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
2112 mfc_context->surface_state.width = obj_surface->orig_width;
2113 mfc_context->surface_state.height = obj_surface->orig_height;
2114 mfc_context->surface_state.w_pitch = obj_surface->width;
2115 mfc_context->surface_state.h_pitch = obj_surface->height;
2117 /* Setup reference frames and direct mv buffers*/
2118 for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2119 obj_surface = encode_state->reference_objects[i];
2121 if (obj_surface && obj_surface->bo) {
2122 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
2123 dri_bo_reference(obj_surface->bo);
2125 /* Check MV temporal buffer */
2126 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2127 assert(hevc_encoder_surface);
2129 if (hevc_encoder_surface) {
2130 hevc_encoder_surface->base.frame_store_id = -1;
2131 /* Setup MV temporal buffer */
2132 mfc_context->current_collocated_mv_temporal_buffer[i].bo = hevc_encoder_surface->motion_vector_temporal_bo;
2133 dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
2141 mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
2142 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2144 obj_buffer = encode_state->coded_buf_object;
2145 bo = obj_buffer->buffer_store->bo;
2146 mfc_context->hcp_indirect_pak_bse_object.bo = bo;
2147 mfc_context->hcp_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2148 mfc_context->hcp_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2149 dri_bo_reference(mfc_context->hcp_indirect_pak_bse_object.bo);
2152 coded_buffer_segment = (struct i965_coded_buffer_segment *)(bo->virtual);
2153 coded_buffer_segment->mapped = 0;
2154 coded_buffer_segment->codec = encoder_context->codec;
2160 /* HEVC BRC related */
2163 intel_hcpe_bit_rate_control_context_init(struct encode_state *encode_state,
2164 struct intel_encoder_context *encoder_context)
2166 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2167 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2169 int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2170 int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2172 double fps = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
2173 double bitrate = encoder_context->brc.bits_per_second[0];
2174 int inter_mb_size = bitrate * 1.0 / (fps + 4.0) / width_in_mbs / height_in_mbs;
2175 int intra_mb_size = inter_mb_size * 5.0;
2178 mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_mb_size = intra_mb_size;
2179 mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
2180 mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_mb_size = inter_mb_size;
2181 mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2182 mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_mb_size = inter_mb_size;
2183 mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2185 for (i = 0 ; i < 3; i++) {
2186 mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
2187 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
2188 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
2189 mfc_context->bit_rate_control_context[i].GrowInit = 6;
2190 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
2191 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
2192 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
2194 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
2195 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
2196 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
2197 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
2198 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
2199 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
2202 mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord = (intra_mb_size + 16) / 16;
2203 mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord = (inter_mb_size + 16) / 16;
2204 mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord = (inter_mb_size + 16) / 16;
2206 mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord * 1.5;
2207 mfc_context->bit_rate_control_context[HEVC_SLICE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord * 1.5;
2208 mfc_context->bit_rate_control_context[HEVC_SLICE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord * 1.5;
2211 static void intel_hcpe_brc_init(struct encode_state *encode_state,
2212 struct intel_encoder_context* encoder_context)
2214 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2215 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2217 double bitrate = (double)encoder_context->brc.bits_per_second[0];
2218 double framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
2219 int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
2220 int intra_period = pSequenceParameter->intra_period;
2221 int ip_period = pSequenceParameter->ip_period;
2222 double qp1_size = 0.1 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
2223 double qp51_size = 0.001 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
2228 double buffer_size = 0;
2231 if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0) ||
2232 (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2235 qp1_size = qp1_size * bpp;
2236 qp51_size = qp51_size * bpp;
2238 if (pSequenceParameter->ip_period) {
2239 pnum = (intra_period + ip_period - 1) / ip_period - 1;
2240 bnum = intra_period - inum - pnum;
2243 mfc_context->brc.mode = encoder_context->rate_control_mode;
2245 mfc_context->brc.target_frame_size[HEVC_SLICE_I] = (int)((double)((bitrate * intra_period) / framerate) /
2246 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
2247 mfc_context->brc.target_frame_size[HEVC_SLICE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2248 mfc_context->brc.target_frame_size[HEVC_SLICE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2250 mfc_context->brc.gop_nums[HEVC_SLICE_I] = inum;
2251 mfc_context->brc.gop_nums[HEVC_SLICE_P] = pnum;
2252 mfc_context->brc.gop_nums[HEVC_SLICE_B] = bnum;
2254 bpf = mfc_context->brc.bits_per_frame = bitrate / framerate;
2256 if (!encoder_context->brc.hrd_buffer_size)
2258 mfc_context->hrd.buffer_size = bitrate * ratio;
2259 mfc_context->hrd.current_buffer_fullness =
2260 (double)(bitrate * ratio/2 < mfc_context->hrd.buffer_size) ?
2261 bitrate * ratio/2 : mfc_context->hrd.buffer_size / 2.;
2264 buffer_size = (double)encoder_context->brc.hrd_buffer_size;
2265 if(buffer_size < bitrate * ratio_min)
2267 buffer_size = bitrate * ratio_min;
2268 }else if (buffer_size > bitrate * ratio_max)
2270 buffer_size = bitrate * ratio_max ;
2272 mfc_context->hrd.buffer_size =buffer_size;
2273 if(encoder_context->brc.hrd_initial_buffer_fullness)
2275 mfc_context->hrd.current_buffer_fullness =
2276 (double)(encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
2277 encoder_context->brc.hrd_initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
2280 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size / 2.;
2285 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size / 2.;
2286 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size / qp1_size;
2287 mfc_context->hrd.violation_noted = 0;
2289 if ((bpf > qp51_size) && (bpf < qp1_size)) {
2290 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51 - 50 * (bpf - qp51_size) / (qp1_size - qp51_size);
2291 } else if (bpf >= qp1_size)
2292 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 1;
2293 else if (bpf <= qp51_size)
2294 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51;
2296 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2297 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2299 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 36);
2300 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 40);
2301 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 45);
2304 int intel_hcpe_update_hrd(struct encode_state *encode_state,
2305 struct gen9_hcpe_context *mfc_context,
2308 double prev_bf = mfc_context->hrd.current_buffer_fullness;
2310 mfc_context->hrd.current_buffer_fullness -= frame_bits;
2312 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
2313 mfc_context->hrd.current_buffer_fullness = prev_bf;
2314 return BRC_UNDERFLOW;
2317 mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
2318 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
2319 if (mfc_context->brc.mode == VA_RC_VBR)
2320 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
2322 mfc_context->hrd.current_buffer_fullness = prev_bf;
2323 return BRC_OVERFLOW;
2326 return BRC_NO_HRD_VIOLATION;
2329 int intel_hcpe_brc_postpack(struct encode_state *encode_state,
2330 struct gen9_hcpe_context *mfc_context,
2333 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
2334 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2335 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2336 int slicetype = pSliceParameter->slice_type;
2337 int qpi = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2338 int qpp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2339 int qpb = mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY;
2340 int qp; // quantizer of previously encoded slice of current type
2341 int qpn; // predicted quantizer for next frame of current type in integer format
2342 double qpf; // predicted quantizer for next frame of current type in float format
2343 double delta_qp; // QP correction
2344 int target_frame_size, frame_size_next;
2346 * x - how far we are from HRD buffer borders
2347 * y - how far we are from target HRD buffer fullness
2350 double frame_size_alpha;
2352 if(slicetype == HEVC_SLICE_B) {
2353 if(pSequenceParameter->ip_period == 1)
2355 slicetype = HEVC_SLICE_P;
2356 }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2357 slicetype = HEVC_SLICE_P;
2361 qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
2363 target_frame_size = mfc_context->brc.target_frame_size[slicetype];
2364 if (mfc_context->hrd.buffer_capacity < 5)
2365 frame_size_alpha = 0;
2367 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
2368 if (frame_size_alpha > 30) frame_size_alpha = 30;
2369 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
2370 (double)(frame_size_alpha + 1.);
2372 /* frame_size_next: avoiding negative number and too small value */
2373 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
2374 frame_size_next = (int)((double)target_frame_size * 0.25);
2376 qpf = (double)qp * target_frame_size / frame_size_next;
2377 qpn = (int)(qpf + 0.5);
2380 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
2381 mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
2382 if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
2384 mfc_context->brc.qpf_rounding_accumulator = 0.;
2385 } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
2387 mfc_context->brc.qpf_rounding_accumulator = 0.;
2390 /* making sure that QP is not changing too fast */
2391 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
2392 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
2393 /* making sure that with QP predictions we did do not leave QPs range */
2394 BRC_CLIP(qpn, 1, 51);
2396 /* checking wthether HRD compliance is still met */
2397 sts = intel_hcpe_update_hrd(encode_state, mfc_context, frame_bits);
2399 /* calculating QP delta as some function*/
2400 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
2402 x /= mfc_context->hrd.target_buffer_fullness;
2403 y = mfc_context->hrd.current_buffer_fullness;
2405 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
2406 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
2408 if (y < 0.01) y = 0.01;
2410 else if (x < -1) x = -1;
2412 delta_qp = BRC_QP_MAX_CHANGE * exp(-1 / y) * sin(BRC_PI_0_5 * x);
2413 qpn = (int)(qpn + delta_qp + 0.5);
2415 /* making sure that with QP predictions we did do not leave QPs range */
2416 BRC_CLIP(qpn, 1, 51);
2418 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
2419 /* correcting QPs of slices of other types */
2420 if (slicetype == HEVC_SLICE_P) {
2421 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
2422 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
2423 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
2424 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
2425 } else if (slicetype == HEVC_SLICE_I) {
2426 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
2427 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
2428 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
2429 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
2430 } else { // HEVC_SLICE_B
2431 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
2432 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
2433 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
2434 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
2436 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 51);
2437 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 51);
2438 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 51);
2439 } else if (sts == BRC_UNDERFLOW) { // underflow
2440 if (qpn <= qp) qpn = qp + 1;
2443 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
2445 } else if (sts == BRC_OVERFLOW) {
2446 if (qpn >= qp) qpn = qp - 1;
2447 if (qpn < 1) { // < 0 (?) overflow with minQP
2449 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
2453 mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
2458 static void intel_hcpe_hrd_context_init(struct encode_state *encode_state,
2459 struct intel_encoder_context *encoder_context)
2461 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2462 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2463 unsigned int target_bit_rate = encoder_context->brc.bits_per_second[0];
2465 // current we only support CBR mode.
2466 if (rate_control_mode == VA_RC_CBR) {
2467 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
2468 mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
2469 mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
2470 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
2471 mfc_context->vui_hrd.i_frame_number = 0;
2473 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
2474 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
2475 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
2481 intel_hcpe_hrd_context_update(struct encode_state *encode_state,
2482 struct gen9_hcpe_context *mfc_context)
2484 mfc_context->vui_hrd.i_frame_number++;
2487 int intel_hcpe_interlace_check(VADriverContextP ctx,
2488 struct encode_state *encode_state,
2489 struct intel_encoder_context *encoder_context)
2491 VAEncSliceParameterBufferHEVC *pSliceParameter;
2492 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2493 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
2494 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
2495 int ctb_size = 1 << log2_ctb_size;
2496 int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2497 int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2501 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2502 pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[i]->buffer;
2503 ctbCount += pSliceParameter->num_ctu_in_slice;
2506 if (ctbCount == (width_in_ctb * height_in_ctb))
2512 void intel_hcpe_brc_prepare(struct encode_state *encode_state,
2513 struct intel_encoder_context *encoder_context)
2515 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2516 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2518 if (rate_control_mode == VA_RC_CBR) {
2520 assert(encoder_context->codec != CODEC_MPEG2);
2522 brc_updated = encoder_context->brc.need_reset;
2524 /*Programing bit rate control */
2525 if ((mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord == 0) ||
2527 intel_hcpe_bit_rate_control_context_init(encode_state, encoder_context);
2528 intel_hcpe_brc_init(encode_state, encoder_context);
2531 /*Programing HRD control */
2532 if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated)
2533 intel_hcpe_hrd_context_init(encode_state, encoder_context);
2537 /* HEVC interface API for encoder */
2540 gen9_hcpe_hevc_encode_picture(VADriverContextP ctx,
2541 struct encode_state *encode_state,
2542 struct intel_encoder_context *encoder_context)
2544 struct gen9_hcpe_context *hcpe_context = encoder_context->mfc_context;
2545 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2546 int current_frame_bits_size;
2550 gen9_hcpe_init(ctx, encode_state, encoder_context);
2551 intel_hcpe_hevc_prepare(ctx, encode_state, encoder_context);
2552 /*Programing bcs pipeline*/
2553 gen9_hcpe_hevc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
2554 gen9_hcpe_run(ctx, encode_state, encoder_context);
2555 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
2556 gen9_hcpe_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
2557 sts = intel_hcpe_brc_postpack(encode_state, hcpe_context, current_frame_bits_size);
2558 if (sts == BRC_NO_HRD_VIOLATION) {
2559 intel_hcpe_hrd_context_update(encode_state, hcpe_context);
2561 } else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
2562 if (!hcpe_context->hrd.violation_noted) {
2563 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP) ? "overflow" : "underflow");
2564 hcpe_context->hrd.violation_noted = 1;
2566 return VA_STATUS_SUCCESS;
2573 return VA_STATUS_SUCCESS;
2577 gen9_hcpe_context_destroy(void *context)
2579 struct gen9_hcpe_context *hcpe_context = context;
2582 dri_bo_unreference(hcpe_context->deblocking_filter_line_buffer.bo);
2583 hcpe_context->deblocking_filter_line_buffer.bo = NULL;
2585 dri_bo_unreference(hcpe_context->deblocking_filter_tile_line_buffer.bo);
2586 hcpe_context->deblocking_filter_tile_line_buffer.bo = NULL;
2588 dri_bo_unreference(hcpe_context->deblocking_filter_tile_column_buffer.bo);
2589 hcpe_context->deblocking_filter_tile_column_buffer.bo = NULL;
2591 dri_bo_unreference(hcpe_context->uncompressed_picture_source.bo);
2592 hcpe_context->uncompressed_picture_source.bo = NULL;
2594 dri_bo_unreference(hcpe_context->metadata_line_buffer.bo);
2595 hcpe_context->metadata_line_buffer.bo = NULL;
2597 dri_bo_unreference(hcpe_context->metadata_tile_line_buffer.bo);
2598 hcpe_context->metadata_tile_line_buffer.bo = NULL;
2600 dri_bo_unreference(hcpe_context->metadata_tile_column_buffer.bo);
2601 hcpe_context->metadata_tile_column_buffer.bo = NULL;
2603 dri_bo_unreference(hcpe_context->sao_line_buffer.bo);
2604 hcpe_context->sao_line_buffer.bo = NULL;
2606 dri_bo_unreference(hcpe_context->sao_tile_line_buffer.bo);
2607 hcpe_context->sao_tile_line_buffer.bo = NULL;
2609 dri_bo_unreference(hcpe_context->sao_tile_column_buffer.bo);
2610 hcpe_context->sao_tile_column_buffer.bo = NULL;
2612 /* mv temporal buffer */
2613 for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
2614 if (hcpe_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
2615 dri_bo_unreference(hcpe_context->current_collocated_mv_temporal_buffer[i].bo);
2616 hcpe_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
2619 for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2620 dri_bo_unreference(hcpe_context->reference_surfaces[i].bo);
2621 hcpe_context->reference_surfaces[i].bo = NULL;
2624 dri_bo_unreference(hcpe_context->hcp_indirect_cu_object.bo);
2625 hcpe_context->hcp_indirect_cu_object.bo = NULL;
2627 dri_bo_unreference(hcpe_context->hcp_indirect_pak_bse_object.bo);
2628 hcpe_context->hcp_indirect_pak_bse_object.bo = NULL;
2630 dri_bo_unreference(hcpe_context->hcp_batchbuffer_surface.bo);
2631 hcpe_context->hcp_batchbuffer_surface.bo = NULL;
2633 dri_bo_unreference(hcpe_context->aux_batchbuffer_surface.bo);
2634 hcpe_context->aux_batchbuffer_surface.bo = NULL;
2636 if (hcpe_context->aux_batchbuffer)
2637 intel_batchbuffer_free(hcpe_context->aux_batchbuffer);
2639 hcpe_context->aux_batchbuffer = NULL;
2644 VAStatus gen9_hcpe_pipeline(VADriverContextP ctx,
2646 struct encode_state *encode_state,
2647 struct intel_encoder_context *encoder_context)
2652 case VAProfileHEVCMain:
2653 case VAProfileHEVCMain10:
2654 vaStatus = gen9_hcpe_hevc_encode_picture(ctx, encode_state, encoder_context);
2658 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2665 Bool gen9_hcpe_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2667 struct gen9_hcpe_context *hcpe_context = calloc(1, sizeof(struct gen9_hcpe_context));
2669 assert(hcpe_context);
2670 hcpe_context->pipe_mode_select = gen9_hcpe_pipe_mode_select;
2671 hcpe_context->set_surface_state = gen9_hcpe_surface_state;
2672 hcpe_context->ind_obj_base_addr_state = gen9_hcpe_ind_obj_base_addr_state;
2673 hcpe_context->pic_state = gen9_hcpe_hevc_pic_state;
2674 hcpe_context->qm_state = gen9_hcpe_hevc_qm_state;
2675 hcpe_context->fqm_state = gen9_hcpe_hevc_fqm_state;
2676 hcpe_context->insert_object = gen9_hcpe_hevc_insert_object;
2677 hcpe_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2679 encoder_context->mfc_context = hcpe_context;
2680 encoder_context->mfc_context_destroy = gen9_hcpe_context_destroy;
2681 encoder_context->mfc_pipeline = gen9_hcpe_pipeline;
2682 encoder_context->mfc_brc_prepare = intel_hcpe_brc_prepare;
2684 hevc_gen_default_iq_matrix_encoder(&hcpe_context->iq_matrix_hevc);