2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
48 #define log2f(x) (logf(x)/(float)M_LN2)
51 int intel_avc_enc_slice_type_fixup(int slice_type)
53 if (slice_type == SLICE_TYPE_SP ||
54 slice_type == SLICE_TYPE_P)
55 slice_type = SLICE_TYPE_P;
56 else if (slice_type == SLICE_TYPE_SI ||
57 slice_type == SLICE_TYPE_I)
58 slice_type = SLICE_TYPE_I;
60 if (slice_type != SLICE_TYPE_B)
61 WARN_ONCE("Invalid slice type for H.264 encoding!\n");
63 slice_type = SLICE_TYPE_B;
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
71 struct intel_encoder_context *encoder_context)
73 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
76 for(i = 0 ; i < 3; i++) {
77 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79 mfc_context->bit_rate_control_context[i].GrowInit = 6;
80 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
84 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94 struct intel_encoder_context* encoder_context)
96 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97 double bitrate = encoder_context->brc.bits_per_second[0];
98 double framerate = (double)encoder_context->brc.framerate_per_100s[0] / 100.0;
99 int inum = encoder_context->brc.num_iframes_in_gop,
100 pnum = encoder_context->brc.num_pframes_in_gop,
101 bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
102 int intra_period = encoder_context->brc.gop_size;
103 double qp1_size = 0.1 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
104 double qp51_size = 0.001 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
108 mfc_context->brc.mode = encoder_context->rate_control_mode;
110 for (i = 0; i < 3; i++) {
111 mfc_context->brc.qp_prime_y[0][i] = 26;
114 mfc_context->brc.target_frame_size[0][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
115 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
116 mfc_context->brc.target_frame_size[0][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[0][SLICE_TYPE_I];
117 mfc_context->brc.target_frame_size[0][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[0][SLICE_TYPE_I];
119 mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
120 mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
121 mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
123 bpf = mfc_context->brc.bits_per_frame[0] = bitrate/framerate;
125 mfc_context->hrd.buffer_size = encoder_context->brc.hrd_buffer_size;
126 mfc_context->hrd.current_buffer_fullness =
127 (double)(encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
128 encoder_context->brc.hrd_initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
129 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
130 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
131 mfc_context->hrd.violation_noted = 0;
133 if ((bpf > qp51_size) && (bpf < qp1_size)) {
134 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
136 else if (bpf >= qp1_size)
137 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] = 1;
138 else if (bpf <= qp51_size)
139 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] = 51;
141 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P];
142 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I];
144 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], 1, 51);
145 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], 1, 51);
146 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], 1, 51);
149 int intel_mfc_update_hrd(struct encode_state *encode_state,
150 struct intel_encoder_context *encoder_context,
153 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
154 double prev_bf = mfc_context->hrd.current_buffer_fullness;
156 mfc_context->hrd.current_buffer_fullness -= frame_bits;
158 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
159 mfc_context->hrd.current_buffer_fullness = prev_bf;
160 return BRC_UNDERFLOW;
163 mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame[0];
164 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
165 if (mfc_context->brc.mode == VA_RC_VBR)
166 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
168 mfc_context->hrd.current_buffer_fullness = prev_bf;
172 return BRC_NO_HRD_VIOLATION;
175 int intel_mfc_brc_postpack(struct encode_state *encode_state,
176 struct intel_encoder_context *encoder_context,
179 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
180 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
181 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
182 int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
183 int qpi = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I];
184 int qpp = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P];
185 int qpb = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B];
186 int qp; // quantizer of previously encoded slice of current type
187 int qpn; // predicted quantizer for next frame of current type in integer format
188 double qpf; // predicted quantizer for next frame of current type in float format
189 double delta_qp; // QP correction
190 int target_frame_size, frame_size_next;
192 * x - how far we are from HRD buffer borders
193 * y - how far we are from target HRD buffer fullness
196 double frame_size_alpha;
198 qp = mfc_context->brc.qp_prime_y[0][slicetype];
200 target_frame_size = mfc_context->brc.target_frame_size[0][slicetype];
201 if (mfc_context->hrd.buffer_capacity < 5)
202 frame_size_alpha = 0;
204 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
205 if (frame_size_alpha > 30) frame_size_alpha = 30;
206 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
207 (double)(frame_size_alpha + 1.);
209 /* frame_size_next: avoiding negative number and too small value */
210 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
211 frame_size_next = (int)((double)target_frame_size * 0.25);
213 qpf = (double)qp * target_frame_size / frame_size_next;
214 qpn = (int)(qpf + 0.5);
217 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
218 mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
219 if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
221 mfc_context->brc.qpf_rounding_accumulator = 0.;
222 } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
224 mfc_context->brc.qpf_rounding_accumulator = 0.;
227 /* making sure that QP is not changing too fast */
228 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
229 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
230 /* making sure that with QP predictions we did do not leave QPs range */
231 BRC_CLIP(qpn, 1, 51);
233 /* checking wthether HRD compliance is still met */
234 sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
236 /* calculating QP delta as some function*/
237 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
239 x /= mfc_context->hrd.target_buffer_fullness;
240 y = mfc_context->hrd.current_buffer_fullness;
243 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
244 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
246 if (y < 0.01) y = 0.01;
248 else if (x < -1) x = -1;
250 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
251 qpn = (int)(qpn + delta_qp + 0.5);
253 /* making sure that with QP predictions we did do not leave QPs range */
254 BRC_CLIP(qpn, 1, 51);
256 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
257 /* correcting QPs of slices of other types */
258 if (slicetype == SLICE_TYPE_P) {
259 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
260 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
261 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
262 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
263 } else if (slicetype == SLICE_TYPE_I) {
264 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
265 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
266 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
267 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
268 } else { // SLICE_TYPE_B
269 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
270 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
271 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
272 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
274 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], 1, 51);
275 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], 1, 51);
276 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], 1, 51);
277 } else if (sts == BRC_UNDERFLOW) { // underflow
278 if (qpn <= qp) qpn = qp + 1;
281 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
283 } else if (sts == BRC_OVERFLOW) {
284 if (qpn >= qp) qpn = qp - 1;
285 if (qpn < 1) { // < 0 (?) overflow with minQP
287 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
291 mfc_context->brc.qp_prime_y[0][slicetype] = qpn;
296 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
297 struct intel_encoder_context *encoder_context)
299 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
300 unsigned int rate_control_mode = encoder_context->rate_control_mode;
301 int target_bit_rate = encoder_context->brc.bits_per_second[0];
303 // current we only support CBR mode.
304 if (rate_control_mode == VA_RC_CBR) {
305 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
306 mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
307 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
308 mfc_context->vui_hrd.i_frame_number = 0;
310 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
311 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
312 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
318 intel_mfc_hrd_context_update(struct encode_state *encode_state,
319 struct gen6_mfc_context *mfc_context)
321 mfc_context->vui_hrd.i_frame_number++;
324 int intel_mfc_interlace_check(VADriverContextP ctx,
325 struct encode_state *encode_state,
326 struct intel_encoder_context *encoder_context)
328 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
329 VAEncSliceParameterBufferH264 *pSliceParameter;
332 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
333 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
335 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
336 pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
337 mbCount += pSliceParameter->num_macroblocks;
340 if ( mbCount == ( width_in_mbs * height_in_mbs ) )
346 void intel_mfc_brc_prepare(struct encode_state *encode_state,
347 struct intel_encoder_context *encoder_context)
349 unsigned int rate_control_mode = encoder_context->rate_control_mode;
351 if (encoder_context->codec != CODEC_H264 &&
352 encoder_context->codec != CODEC_H264_MVC)
355 if (rate_control_mode == VA_RC_CBR) {
356 /*Programing bit rate control */
357 if (encoder_context->brc.need_reset) {
358 intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
359 intel_mfc_brc_init(encode_state, encoder_context);
362 /*Programing HRD control */
363 if (encoder_context->brc.need_reset)
364 intel_mfc_hrd_context_init(encode_state, encoder_context);
368 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
369 struct encode_state *encode_state,
370 struct intel_encoder_context *encoder_context,
371 struct intel_batchbuffer *slice_batch)
373 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
374 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
375 unsigned int rate_control_mode = encoder_context->rate_control_mode;
376 unsigned int skip_emul_byte_cnt;
378 if (encode_state->packed_header_data[idx]) {
379 VAEncPackedHeaderParameterBuffer *param = NULL;
380 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
381 unsigned int length_in_bits;
383 assert(encode_state->packed_header_param[idx]);
384 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
385 length_in_bits = param->bit_length;
387 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
388 mfc_context->insert_object(ctx,
391 ALIGN(length_in_bits, 32) >> 5,
392 length_in_bits & 0x1f,
396 !param->has_emulation_bytes,
400 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
402 if (encode_state->packed_header_data[idx]) {
403 VAEncPackedHeaderParameterBuffer *param = NULL;
404 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
405 unsigned int length_in_bits;
407 assert(encode_state->packed_header_param[idx]);
408 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
409 length_in_bits = param->bit_length;
411 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
413 mfc_context->insert_object(ctx,
416 ALIGN(length_in_bits, 32) >> 5,
417 length_in_bits & 0x1f,
421 !param->has_emulation_bytes,
425 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
427 if (encode_state->packed_header_data[idx]) {
428 VAEncPackedHeaderParameterBuffer *param = NULL;
429 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
430 unsigned int length_in_bits;
432 assert(encode_state->packed_header_param[idx]);
433 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
434 length_in_bits = param->bit_length;
436 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
437 mfc_context->insert_object(ctx,
440 ALIGN(length_in_bits, 32) >> 5,
441 length_in_bits & 0x1f,
445 !param->has_emulation_bytes,
447 } else if (rate_control_mode == VA_RC_CBR) {
449 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
451 unsigned char *sei_data = NULL;
453 int length_in_bits = build_avc_sei_buffer_timing(
454 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
455 mfc_context->vui_hrd.i_initial_cpb_removal_delay,
457 mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
458 mfc_context->vui_hrd.i_dpb_output_delay_length,
461 mfc_context->insert_object(ctx,
463 (unsigned int *)sei_data,
464 ALIGN(length_in_bits, 32) >> 5,
465 length_in_bits & 0x1f,
475 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
476 struct encode_state *encode_state,
477 struct intel_encoder_context *encoder_context)
479 struct i965_driver_data *i965 = i965_driver_data(ctx);
480 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
481 struct object_surface *obj_surface;
482 struct object_buffer *obj_buffer;
483 GenAvcSurface *gen6_avc_surface;
485 VAStatus vaStatus = VA_STATUS_SUCCESS;
486 int i, j, enable_avc_ildb = 0;
487 VAEncSliceParameterBufferH264 *slice_param;
488 struct i965_coded_buffer_segment *coded_buffer_segment;
489 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
490 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
491 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
493 if (IS_GEN6(i965->intel.device_info)) {
494 /* On the SNB it should be fixed to 128 for the DMV buffer */
498 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
499 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
500 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
502 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
503 assert((slice_param->slice_type == SLICE_TYPE_I) ||
504 (slice_param->slice_type == SLICE_TYPE_SI) ||
505 (slice_param->slice_type == SLICE_TYPE_P) ||
506 (slice_param->slice_type == SLICE_TYPE_SP) ||
507 (slice_param->slice_type == SLICE_TYPE_B));
509 if (slice_param->disable_deblocking_filter_idc != 1) {
518 /*Setup all the input&output object*/
520 /* Setup current frame and current direct mv buffer*/
521 obj_surface = encode_state->reconstructed_object;
522 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
524 if ( obj_surface->private_data == NULL) {
525 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
526 assert(gen6_avc_surface);
527 gen6_avc_surface->dmv_top =
528 dri_bo_alloc(i965->intel.bufmgr,
530 68 * width_in_mbs * height_in_mbs,
532 gen6_avc_surface->dmv_bottom =
533 dri_bo_alloc(i965->intel.bufmgr,
535 68 * width_in_mbs * height_in_mbs,
537 assert(gen6_avc_surface->dmv_top);
538 assert(gen6_avc_surface->dmv_bottom);
539 obj_surface->private_data = (void *)gen6_avc_surface;
540 obj_surface->free_private_data = (void *)gen_free_avc_surface;
542 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
543 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
544 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
545 dri_bo_reference(gen6_avc_surface->dmv_top);
546 dri_bo_reference(gen6_avc_surface->dmv_bottom);
548 if (enable_avc_ildb) {
549 mfc_context->post_deblocking_output.bo = obj_surface->bo;
550 dri_bo_reference(mfc_context->post_deblocking_output.bo);
552 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
553 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
556 mfc_context->surface_state.width = obj_surface->orig_width;
557 mfc_context->surface_state.height = obj_surface->orig_height;
558 mfc_context->surface_state.w_pitch = obj_surface->width;
559 mfc_context->surface_state.h_pitch = obj_surface->height;
561 /* Setup reference frames and direct mv buffers*/
562 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
563 obj_surface = encode_state->reference_objects[i];
565 if (obj_surface && obj_surface->bo) {
566 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
567 dri_bo_reference(obj_surface->bo);
569 /* Check DMV buffer */
570 if ( obj_surface->private_data == NULL) {
572 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
573 assert(gen6_avc_surface);
574 gen6_avc_surface->dmv_top =
575 dri_bo_alloc(i965->intel.bufmgr,
577 68 * width_in_mbs * height_in_mbs,
579 gen6_avc_surface->dmv_bottom =
580 dri_bo_alloc(i965->intel.bufmgr,
582 68 * width_in_mbs * height_in_mbs,
584 assert(gen6_avc_surface->dmv_top);
585 assert(gen6_avc_surface->dmv_bottom);
586 obj_surface->private_data = gen6_avc_surface;
587 obj_surface->free_private_data = gen_free_avc_surface;
590 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
591 /* Setup DMV buffer */
592 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
593 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
594 dri_bo_reference(gen6_avc_surface->dmv_top);
595 dri_bo_reference(gen6_avc_surface->dmv_bottom);
601 mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
602 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
604 obj_buffer = encode_state->coded_buf_object;
605 bo = obj_buffer->buffer_store->bo;
606 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
607 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
608 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
609 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
612 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
613 coded_buffer_segment->mapped = 0;
614 coded_buffer_segment->codec = encoder_context->codec;
620 * The LUT uses the pair of 4-bit units: (shift, base) structure.
622 * So it is necessary to convert one cost into the nearest LUT format.
624 * 2^K *x = 2^n * (1 + deltaX)
625 * k + log2(x) = n + log2(1 + deltaX)
626 * log2(x) = n - k + log2(1 + deltaX)
627 * As X is in the range of [1, 15]
628 * 4 > n - k + log2(1 + deltaX) >= 0
629 * => n + log2(1 + deltaX) >= k > n - 4 + log2(1 + deltaX)
630 * Then we can derive the corresponding K and get the nearest LUT format.
632 int intel_format_lutvalue(int value, int max)
635 int logvalue, temp1, temp2;
640 logvalue = (int)(log2f((float)value));
644 int error, temp_value, base, j, temp_err;
646 j = logvalue - 4 + 1;
648 for(; j <= logvalue; j++) {
652 base = (value + (1 << (j - 1)) - 1) >> j;
657 temp_value = base << j;
658 temp_err = abs(value - temp_value);
659 if (temp_err < error) {
661 ret = (j << 4) | base;
667 temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
668 temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
677 #define VP8_QP_MAX 128
680 static float intel_lambda_qp(int qp)
682 float value, lambdaf;
684 value = value / 6 - 2;
687 lambdaf = roundf(powf(2, value));
692 void intel_h264_calc_mbmvcost_qp(int qp,
694 uint8_t *vme_state_message)
696 int m_cost, j, mv_count;
697 float lambda, m_costf;
699 assert(qp <= QP_MAX);
700 lambda = intel_lambda_qp(qp);
703 vme_state_message[MODE_CHROMA_INTRA] = 0;
704 vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
706 if (slice_type == SLICE_TYPE_I) {
707 vme_state_message[MODE_INTRA_16X16] = 0;
709 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
710 m_cost = lambda * 16;
711 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
713 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
716 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
717 for (j = 1; j < 3; j++) {
718 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
719 m_cost = (int)m_costf;
720 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
723 for (j = 4; j <= 64; j *= 2) {
724 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
725 m_cost = (int)m_costf;
726 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
731 vme_state_message[MODE_INTRA_16X16] = 0x4a;
732 vme_state_message[MODE_INTRA_8X8] = 0x4a;
733 vme_state_message[MODE_INTRA_4X4] = 0x4a;
734 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
735 vme_state_message[MODE_INTER_16X16] = 0x4a;
736 vme_state_message[MODE_INTER_16X8] = 0x4a;
737 vme_state_message[MODE_INTER_8X8] = 0x4a;
738 vme_state_message[MODE_INTER_8X4] = 0x4a;
739 vme_state_message[MODE_INTER_4X4] = 0x4a;
740 vme_state_message[MODE_INTER_BWD] = 0x2a;
743 m_costf = lambda * 10;
744 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
745 m_cost = lambda * 14;
746 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
747 m_cost = lambda * 24;
748 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
749 m_costf = lambda * 3.5;
751 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
752 if (slice_type == SLICE_TYPE_P) {
753 m_costf = lambda * 2.5;
755 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
756 m_costf = lambda * 4;
758 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
759 m_costf = lambda * 1.5;
761 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
762 m_costf = lambda * 3;
764 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
765 m_costf = lambda * 5;
767 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
768 /* BWD is not used in P-frame */
769 vme_state_message[MODE_INTER_BWD] = 0;
771 m_costf = lambda * 2.5;
773 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
774 m_costf = lambda * 5.5;
776 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
777 m_costf = lambda * 3.5;
779 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
780 m_costf = lambda * 5.0;
782 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
783 m_costf = lambda * 6.5;
785 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
786 m_costf = lambda * 1.5;
788 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
794 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
795 struct encode_state *encode_state,
796 struct intel_encoder_context *encoder_context)
798 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
799 struct gen6_vme_context *vme_context = encoder_context->vme_context;
800 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
801 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
803 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
805 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
807 if (encoder_context->rate_control_mode == VA_RC_CQP)
808 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
810 qp = mfc_context->brc.qp_prime_y[0][slice_type];
812 if (vme_state_message == NULL)
815 intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
818 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
819 struct encode_state *encode_state,
820 struct intel_encoder_context *encoder_context)
822 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
823 struct gen6_vme_context *vme_context = encoder_context->vme_context;
824 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
825 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
826 int qp, m_cost, j, mv_count;
827 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
828 float lambda, m_costf;
830 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
831 int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
833 if (vme_state_message == NULL)
836 if (encoder_context->rate_control_mode == VA_RC_CQP)
837 qp = q_matrix->quantization_index[0];
839 qp = mfc_context->brc.qp_prime_y[0][slice_type];
841 lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
844 vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
847 vme_state_message[MODE_INTRA_16X16] = 0;
848 m_cost = lambda * 16;
849 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
851 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
854 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
855 for (j = 1; j < 3; j++) {
856 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
857 m_cost = (int)m_costf;
858 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
861 for (j = 4; j <= 64; j *= 2) {
862 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
863 m_cost = (int)m_costf;
864 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
869 vme_state_message[MODE_INTRA_16X16] = 0x4a;
870 vme_state_message[MODE_INTRA_4X4] = 0x4a;
871 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
872 vme_state_message[MODE_INTER_16X16] = 0x4a;
873 vme_state_message[MODE_INTER_16X8] = 0x4a;
874 vme_state_message[MODE_INTER_8X8] = 0x4a;
875 vme_state_message[MODE_INTER_4X4] = 0x4a;
876 vme_state_message[MODE_INTER_BWD] = 0;
879 m_costf = lambda * 10;
880 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
881 m_cost = lambda * 24;
882 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
884 m_costf = lambda * 3.5;
886 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
888 m_costf = lambda * 2.5;
890 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
891 m_costf = lambda * 4;
893 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
894 m_costf = lambda * 1.5;
896 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
897 m_costf = lambda * 5;
899 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
900 /* BWD is not used in P-frame */
901 vme_state_message[MODE_INTER_BWD] = 0;
905 #define MB_SCOREBOARD_A (1 << 0)
906 #define MB_SCOREBOARD_B (1 << 1)
907 #define MB_SCOREBOARD_C (1 << 2)
909 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
911 vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
912 vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
913 vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
917 /* In VME prediction the current mb depends on the neighbour
918 * A/B/C macroblock. So the left/up/up-right dependency should
921 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
922 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
923 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
924 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
925 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
926 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
928 vme_context->gpe_context.vfe_desc7.dword = 0;
932 /* check whether the mb of (x_index, y_index) is out of bound */
933 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
936 if (x_index < 0 || x_index >= mb_width)
938 if (y_index < 0 || y_index >= mb_height)
941 mb_index = y_index * mb_width + x_index;
942 if (mb_index < first_mb || mb_index > (first_mb + num_mb))
948 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
949 struct encode_state *encode_state,
950 int mb_width, int mb_height,
952 int transform_8x8_mode_flag,
953 struct intel_encoder_context *encoder_context)
955 struct gen6_vme_context *vme_context = encoder_context->vme_context;
958 unsigned int *command_ptr;
959 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
960 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
961 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
962 int qp,qp_mb,qp_index;
963 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
965 if (encoder_context->rate_control_mode == VA_RC_CQP)
966 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
968 qp = mfc_context->brc.qp_prime_y[0][slice_type];
970 #define USE_SCOREBOARD (1 << 21)
972 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
973 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
975 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
976 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
977 int first_mb = pSliceParameter->macroblock_address;
978 int num_mb = pSliceParameter->num_macroblocks;
979 unsigned int mb_intra_ub, score_dep;
980 int x_outer, y_outer, x_inner, y_inner;
983 x_outer = first_mb % mb_width;
984 y_outer = first_mb / mb_width;
987 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
990 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
994 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
995 score_dep |= MB_SCOREBOARD_A;
997 if (y_inner != mb_row) {
998 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
999 score_dep |= MB_SCOREBOARD_B;
1001 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1002 if (x_inner != (mb_width -1)) {
1003 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1004 score_dep |= MB_SCOREBOARD_C;
1008 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1009 *command_ptr++ = kernel;
1010 *command_ptr++ = USE_SCOREBOARD;
1013 /* the (X, Y) term of scoreboard */
1014 *command_ptr++ = ((y_inner << 16) | x_inner);
1015 *command_ptr++ = score_dep;
1017 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1018 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1019 /* QP occupies one byte */
1020 if (vme_context->roi_enabled) {
1021 qp_index = y_inner * mb_width + x_inner;
1022 qp_mb = *(vme_context->qp_per_mb + qp_index);
1025 *command_ptr++ = qp_mb;
1032 xtemp_outer = mb_width - 2;
1033 if (xtemp_outer < 0)
1035 x_outer = xtemp_outer;
1036 y_outer = first_mb / mb_width;
1037 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1040 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1044 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1045 score_dep |= MB_SCOREBOARD_A;
1047 if (y_inner != mb_row) {
1048 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1049 score_dep |= MB_SCOREBOARD_B;
1051 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1053 if (x_inner != (mb_width -1)) {
1054 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1055 score_dep |= MB_SCOREBOARD_C;
1059 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1060 *command_ptr++ = kernel;
1061 *command_ptr++ = USE_SCOREBOARD;
1064 /* the (X, Y) term of scoreboard */
1065 *command_ptr++ = ((y_inner << 16) | x_inner);
1066 *command_ptr++ = score_dep;
1068 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1069 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1070 /* qp occupies one byte */
1071 if (vme_context->roi_enabled) {
1072 qp_index = y_inner * mb_width + x_inner;
1073 qp_mb = *(vme_context->qp_per_mb + qp_index);
1076 *command_ptr++ = qp_mb;
1082 if (x_outer >= mb_width) {
1084 x_outer = xtemp_outer;
1090 *command_ptr++ = MI_BATCH_BUFFER_END;
1092 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1096 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1098 unsigned int is_long_term =
1099 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1100 unsigned int is_top_field =
1101 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1102 unsigned int is_bottom_field =
1103 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1105 return ((is_long_term << 6) |
1106 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1107 (frame_store_id << 1) |
1108 ((is_top_field ^ 1) & is_bottom_field));
1112 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1113 struct encode_state *encode_state,
1114 struct intel_encoder_context *encoder_context)
1116 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1117 struct intel_batchbuffer *batch = encoder_context->base.batch;
1119 struct object_surface *obj_surface;
1120 unsigned int fref_entry, bref_entry;
1122 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1124 fref_entry = 0x80808080;
1125 bref_entry = 0x80808080;
1126 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1128 if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1129 int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1131 if (ref_idx_l0 > 3) {
1132 WARN_ONCE("ref_idx_l0 is out of range\n");
1136 obj_surface = vme_context->used_reference_objects[0];
1138 for (i = 0; i < 16; i++) {
1140 obj_surface == encode_state->reference_objects[i]) {
1145 if (frame_index == -1) {
1146 WARN_ONCE("RefPicList0 is not found in DPB!\n");
1148 int ref_idx_l0_shift = ref_idx_l0 * 8;
1149 fref_entry &= ~(0xFF << ref_idx_l0_shift);
1150 fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1154 if (slice_type == SLICE_TYPE_B) {
1155 int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1157 if (ref_idx_l1 > 3) {
1158 WARN_ONCE("ref_idx_l1 is out of range\n");
1162 obj_surface = vme_context->used_reference_objects[1];
1164 for (i = 0; i < 16; i++) {
1166 obj_surface == encode_state->reference_objects[i]) {
1171 if (frame_index == -1) {
1172 WARN_ONCE("RefPicList1 is not found in DPB!\n");
1174 int ref_idx_l1_shift = ref_idx_l1 * 8;
1175 bref_entry &= ~(0xFF << ref_idx_l1_shift);
1176 bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1180 BEGIN_BCS_BATCH(batch, 10);
1181 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1182 OUT_BCS_BATCH(batch, 0); //Select L0
1183 OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference
1184 for(i = 0; i < 7; i++) {
1185 OUT_BCS_BATCH(batch, 0x80808080);
1187 ADVANCE_BCS_BATCH(batch);
1189 BEGIN_BCS_BATCH(batch, 10);
1190 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1191 OUT_BCS_BATCH(batch, 1); //Select L1
1192 OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference
1193 for(i = 0; i < 7; i++) {
1194 OUT_BCS_BATCH(batch, 0x80808080);
1196 ADVANCE_BCS_BATCH(batch);
1200 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1201 struct encode_state *encode_state,
1202 struct intel_encoder_context *encoder_context)
1204 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1205 uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1206 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1207 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1208 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1209 uint32_t mv_x, mv_y;
1210 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1211 VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1212 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1214 if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1217 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1220 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1224 WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1229 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1230 if (pic_param->picture_type != VAEncPictureTypeIntra) {
1231 int qp, m_cost, j, mv_count;
1232 float lambda, m_costf;
1233 slice_param = (VAEncSliceParameterBufferMPEG2 *)
1234 encode_state->slice_params_ext[0]->buffer;
1235 qp = slice_param->quantiser_scale_code;
1236 lambda = intel_lambda_qp(qp);
1237 /* No Intra prediction. So it is zero */
1238 vme_state_message[MODE_INTRA_8X8] = 0;
1239 vme_state_message[MODE_INTRA_4X4] = 0;
1240 vme_state_message[MODE_INTER_MV0] = 0;
1241 for (j = 1; j < 3; j++) {
1242 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1243 m_cost = (int)m_costf;
1244 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1247 for (j = 4; j <= 64; j *= 2) {
1248 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1249 m_cost = (int)m_costf;
1250 vme_state_message[MODE_INTER_MV0 + mv_count] =
1251 intel_format_lutvalue(m_cost, 0x6f);
1255 /* It can only perform the 16x16 search. So mode cost can be ignored for
1256 * the other mode. for example: 16x8/8x8
1258 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1259 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1261 vme_state_message[MODE_INTER_16X8] = 0;
1262 vme_state_message[MODE_INTER_8X8] = 0;
1263 vme_state_message[MODE_INTER_8X4] = 0;
1264 vme_state_message[MODE_INTER_4X4] = 0;
1265 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1268 vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1270 vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1275 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1276 struct encode_state *encode_state,
1277 int mb_width, int mb_height,
1279 struct intel_encoder_context *encoder_context)
1281 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1282 unsigned int *command_ptr;
1284 #define MPEG2_SCOREBOARD (1 << 21)
1286 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1287 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1290 unsigned int mb_intra_ub, score_dep;
1291 int x_outer, y_outer, x_inner, y_inner;
1292 int xtemp_outer = 0;
1294 int num_mb = mb_width * mb_height;
1300 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1303 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1307 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1308 score_dep |= MB_SCOREBOARD_A;
1311 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1312 score_dep |= MB_SCOREBOARD_B;
1315 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1317 if (x_inner != (mb_width -1)) {
1318 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1319 score_dep |= MB_SCOREBOARD_C;
1323 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1324 *command_ptr++ = kernel;
1325 *command_ptr++ = MPEG2_SCOREBOARD;
1328 /* the (X, Y) term of scoreboard */
1329 *command_ptr++ = ((y_inner << 16) | x_inner);
1330 *command_ptr++ = score_dep;
1332 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1333 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1340 xtemp_outer = mb_width - 2;
1341 if (xtemp_outer < 0)
1343 x_outer = xtemp_outer;
1345 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1348 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1352 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1353 score_dep |= MB_SCOREBOARD_A;
1356 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1357 score_dep |= MB_SCOREBOARD_B;
1360 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1362 if (x_inner != (mb_width -1)) {
1363 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1364 score_dep |= MB_SCOREBOARD_C;
1368 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1369 *command_ptr++ = kernel;
1370 *command_ptr++ = MPEG2_SCOREBOARD;
1373 /* the (X, Y) term of scoreboard */
1374 *command_ptr++ = ((y_inner << 16) | x_inner);
1375 *command_ptr++ = score_dep;
1377 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1378 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1384 if (x_outer >= mb_width) {
1386 x_outer = xtemp_outer;
1392 *command_ptr++ = MI_BATCH_BUFFER_END;
1394 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1399 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1400 VAPictureH264 *ref_list,
1404 int i, found = -1, min = 0x7FFFFFFF;
1406 for (i = 0; i < num_pictures; i++) {
1409 if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1410 (ref_list[i].picture_id == VA_INVALID_SURFACE))
1413 tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1418 if (tmp > 0 && tmp < min) {
1428 intel_avc_vme_reference_state(VADriverContextP ctx,
1429 struct encode_state *encode_state,
1430 struct intel_encoder_context *encoder_context,
1433 void (* vme_source_surface_state)(
1434 VADriverContextP ctx,
1436 struct object_surface *obj_surface,
1437 struct intel_encoder_context *encoder_context))
1439 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1440 struct object_surface *obj_surface = NULL;
1441 struct i965_driver_data *i965 = i965_driver_data(ctx);
1442 VASurfaceID ref_surface_id;
1443 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1444 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1445 int max_num_references;
1446 VAPictureH264 *curr_pic;
1447 VAPictureH264 *ref_list;
1450 if (list_index == 0) {
1451 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1452 ref_list = slice_param->RefPicList0;
1454 max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1455 ref_list = slice_param->RefPicList1;
1458 if (max_num_references == 1) {
1459 if (list_index == 0) {
1460 ref_surface_id = slice_param->RefPicList0[0].picture_id;
1461 vme_context->used_references[0] = &slice_param->RefPicList0[0];
1463 ref_surface_id = slice_param->RefPicList1[0].picture_id;
1464 vme_context->used_references[1] = &slice_param->RefPicList1[0];
1467 if (ref_surface_id != VA_INVALID_SURFACE)
1468 obj_surface = SURFACE(ref_surface_id);
1472 obj_surface = encode_state->reference_objects[list_index];
1473 vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1478 curr_pic = &pic_param->CurrPic;
1480 /* select the reference frame in temporal space */
1481 ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1482 ref_surface_id = ref_list[ref_idx].picture_id;
1484 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1485 obj_surface = SURFACE(ref_surface_id);
1487 vme_context->used_reference_objects[list_index] = obj_surface;
1488 vme_context->used_references[list_index] = &ref_list[ref_idx];
1493 assert(ref_idx >= 0);
1494 vme_context->used_reference_objects[list_index] = obj_surface;
1495 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1496 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1501 vme_context->used_reference_objects[list_index] = NULL;
1502 vme_context->used_references[list_index] = NULL;
1503 vme_context->ref_index_in_mb[list_index] = 0;
1507 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1508 struct encode_state *encode_state,
1509 struct intel_encoder_context *encoder_context,
1511 struct intel_batchbuffer *slice_batch)
1513 int count, i, start_index;
1514 unsigned int length_in_bits;
1515 VAEncPackedHeaderParameterBuffer *param = NULL;
1516 unsigned int *header_data = NULL;
1517 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1518 int slice_header_index;
1520 if (encode_state->slice_header_index[slice_index] == 0)
1521 slice_header_index = -1;
1523 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1525 count = encode_state->slice_rawdata_count[slice_index];
1526 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1528 for (i = 0; i < count; i++) {
1529 unsigned int skip_emul_byte_cnt;
1531 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1533 param = (VAEncPackedHeaderParameterBuffer *)
1534 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1536 /* skip the slice header packed data type as it is lastly inserted */
1537 if (param->type == VAEncPackedHeaderSlice)
1540 length_in_bits = param->bit_length;
1542 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1544 /* as the slice header is still required, the last header flag is set to
1547 mfc_context->insert_object(ctx,
1550 ALIGN(length_in_bits, 32) >> 5,
1551 length_in_bits & 0x1f,
1555 !param->has_emulation_bytes,
1559 if (slice_header_index == -1) {
1560 unsigned char *slice_header = NULL;
1561 int slice_header_length_in_bits = 0;
1562 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1563 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1564 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1566 /* No slice header data is passed. And the driver needs to generate it */
1567 /* For the Normal H264 */
1568 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1572 mfc_context->insert_object(ctx, encoder_context,
1573 (unsigned int *)slice_header,
1574 ALIGN(slice_header_length_in_bits, 32) >> 5,
1575 slice_header_length_in_bits & 0x1f,
1576 5, /* first 5 bytes are start code + nal unit type */
1577 1, 0, 1, slice_batch);
1581 unsigned int skip_emul_byte_cnt;
1583 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1585 param = (VAEncPackedHeaderParameterBuffer *)
1586 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1587 length_in_bits = param->bit_length;
1589 /* as the slice header is the last header data for one slice,
1590 * the last header flag is set to one.
1592 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1594 mfc_context->insert_object(ctx,
1597 ALIGN(length_in_bits, 32) >> 5,
1598 length_in_bits & 0x1f,
1602 !param->has_emulation_bytes,
1610 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1611 struct encode_state *encode_state,
1612 struct intel_encoder_context *encoder_context)
1614 struct i965_driver_data *i965 = i965_driver_data(ctx);
1615 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1616 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1619 uint8_t *cost_table;
1621 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1624 if (slice_type == SLICE_TYPE_I) {
1625 if (vme_context->i_qp_cost_table)
1627 } else if (slice_type == SLICE_TYPE_P) {
1628 if (vme_context->p_qp_cost_table)
1631 if (vme_context->b_qp_cost_table)
1635 /* It is enough to allocate 32 bytes for each qp. */
1636 bo = dri_bo_alloc(i965->intel.bufmgr,
1642 assert(bo->virtual);
1643 cost_table = (uint8_t *)(bo->virtual);
1644 for (qp = 0; qp < QP_MAX; qp++) {
1645 intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1651 if (slice_type == SLICE_TYPE_I) {
1652 vme_context->i_qp_cost_table = bo;
1653 } else if (slice_type == SLICE_TYPE_P) {
1654 vme_context->p_qp_cost_table = bo;
1656 vme_context->b_qp_cost_table = bo;
1659 vme_context->cost_table_size = QP_MAX * 32;
1664 intel_h264_setup_cost_surface(VADriverContextP ctx,
1665 struct encode_state *encode_state,
1666 struct intel_encoder_context *encoder_context,
1667 unsigned long binding_table_offset,
1668 unsigned long surface_state_offset)
1670 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1671 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1675 struct i965_buffer_surface cost_table;
1677 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1680 if (slice_type == SLICE_TYPE_I) {
1681 bo = vme_context->i_qp_cost_table;
1682 } else if (slice_type == SLICE_TYPE_P) {
1683 bo = vme_context->p_qp_cost_table;
1685 bo = vme_context->b_qp_cost_table;
1689 cost_table.num_blocks = QP_MAX;
1690 cost_table.pitch = 16;
1691 cost_table.size_block = 32;
1693 vme_context->vme_buffer_suface_setup(ctx,
1694 &vme_context->gpe_context,
1696 binding_table_offset,
1697 surface_state_offset);
1701 * the idea of conversion between qp and qstep comes from scaling process
1702 * of transform coeff for Luma component in H264 spec.
1704 * In order to avoid too small qstep, it is multiplied by 16.
1706 static float intel_h264_qp_qstep(int qp)
1710 value = value / 6 - 2;
1711 qstep = powf(2, value);
1715 static int intel_h264_qstep_qp(float qstep)
1719 qp = 12.0f + 6.0f * log2f(qstep);
1725 * Currently it is based on the following assumption:
1726 * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1727 * total_aread * 1 / baseqp_qstep
1729 * qstep is the linearized quantizer of H264 quantizer
1732 int row_start_in_mb;
1734 int col_start_in_mb;
1744 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1746 VAEncMiscParameterBufferROI *pMiscParamROI,
1747 struct encode_state *encode_state,
1748 struct intel_encoder_context *encoder_context)
1751 VAEncROI *region_roi;
1754 ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1759 float qstep_nonroi, qstep_base;
1760 float roi_area, total_area, nonroi_area;
1763 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1764 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1765 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1766 int mbs_in_picture = width_in_mbs * height_in_mbs;
1768 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1769 VAStatus vaStatus = VA_STATUS_SUCCESS;
1771 if(pMiscParamROI != NULL)
1773 num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi;
1775 /* currently roi_value_is_qp_delta is the only supported mode of priority.
1777 * qp_delta set by user is added to base_qp, which is then clapped by
1778 * [base_qp-min_delta, base_qp+max_delta].
1780 ASSERT_RET(pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta,VA_STATUS_ERROR_INVALID_PARAMETER);
1783 /* when the base_qp is lower than 12, the quality is quite good based
1784 * on the H264 test experience.
1785 * In such case it is unnecessary to adjust the quality for ROI region.
1787 if (base_qp <= 12) {
1788 nonroi_qp = base_qp;
1795 for (i = 0; i < num_roi; i++) {
1796 int row_start, row_end, col_start, col_end;
1797 int roi_width_mbs, roi_height_mbs;
1802 region_roi = (VAEncROI *)pMiscParamROI->roi + i;
1804 col_start = region_roi->roi_rectangle.x;
1805 col_end = col_start + region_roi->roi_rectangle.width;
1806 row_start = region_roi->roi_rectangle.y;
1807 row_end = row_start + region_roi->roi_rectangle.height;
1808 col_start = col_start / 16;
1809 col_end = (col_end + 15) / 16;
1810 row_start = row_start / 16;
1811 row_end = (row_end + 15) / 16;
1813 roi_width_mbs = col_end - col_start;
1814 roi_height_mbs = row_end - row_start;
1815 mbs_in_roi = roi_width_mbs * roi_height_mbs;
1817 param_regions[i].row_start_in_mb = row_start;
1818 param_regions[i].row_end_in_mb = row_end;
1819 param_regions[i].col_start_in_mb = col_start;
1820 param_regions[i].col_end_in_mb = col_end;
1821 param_regions[i].width_mbs = roi_width_mbs;
1822 param_regions[i].height_mbs = roi_height_mbs;
1824 roi_qp = base_qp + region_roi->roi_value;
1825 BRC_CLIP(roi_qp, 1, 51);
1827 param_regions[i].roi_qp = roi_qp;
1828 qstep_roi = intel_h264_qp_qstep(roi_qp);
1830 roi_area += mbs_in_roi;
1831 sum_roi += mbs_in_roi / qstep_roi;
1834 total_area = mbs_in_picture;
1835 nonroi_area = total_area - roi_area;
1837 qstep_base = intel_h264_qp_qstep(base_qp);
1838 temp = (total_area / qstep_base - sum_roi);
1843 qstep_nonroi = nonroi_area / temp;
1844 nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
1847 BRC_CLIP(nonroi_qp, 1, 51);
1850 memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
1854 for (i = 0; i < num_roi; i++) {
1855 for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
1856 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
1857 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
1865 intel_h264_enc_roi_config(VADriverContextP ctx,
1866 struct encode_state *encode_state,
1867 struct intel_encoder_context *encoder_context)
1871 VAEncROI *region_roi;
1872 struct i965_driver_data *i965 = i965_driver_data(ctx);
1873 VAEncMiscParameterBuffer* pMiscParamROI;
1874 VAEncMiscParameterBufferROI *pParamROI = NULL;
1875 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1876 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1877 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1878 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1879 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1881 int row_start, row_end, col_start, col_end;
1884 vme_context->roi_enabled = 0;
1885 /* Restriction: Disable ROI when multi-slice is enabled */
1886 if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1889 if (encode_state->misc_param[VAEncMiscParameterTypeROI][0] != NULL) {
1890 pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI][0]->buffer;
1891 pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
1893 /* check whether number of ROI is correct */
1894 num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi;
1898 vme_context->roi_enabled = 1;
1900 if (!vme_context->roi_enabled)
1903 if ((vme_context->saved_width_mbs != width_in_mbs) ||
1904 (vme_context->saved_height_mbs != height_in_mbs)) {
1905 free(vme_context->qp_per_mb);
1906 vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1908 vme_context->saved_width_mbs = width_in_mbs;
1909 vme_context->saved_height_mbs = height_in_mbs;
1910 assert(vme_context->qp_per_mb);
1912 if (encoder_context->rate_control_mode == VA_RC_CBR) {
1914 * TODO: More complex Qp adjust needs to be added.
1915 * Currently it is initialized to slice_qp.
1917 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1919 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1921 qp = mfc_context->brc.qp_prime_y[0][slice_type];
1922 intel_h264_enc_roi_cbr(ctx, qp, pParamROI,encode_state, encoder_context);
1924 } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1925 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1926 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1929 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1930 memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1933 for (j = num_roi; j ; j--) {
1934 int qp_delta, qp_clip;
1936 region_roi = (VAEncROI *)pParamROI->roi + j - 1;
1938 col_start = region_roi->roi_rectangle.x;
1939 col_end = col_start + region_roi->roi_rectangle.width;
1940 row_start = region_roi->roi_rectangle.y;
1941 row_end = row_start + region_roi->roi_rectangle.height;
1943 col_start = col_start / 16;
1944 col_end = (col_end + 15) / 16;
1945 row_start = row_start / 16;
1946 row_end = (row_end + 15) / 16;
1948 qp_delta = region_roi->roi_value;
1949 qp_clip = qp + qp_delta;
1951 BRC_CLIP(qp_clip, 1, 51);
1953 for (i = row_start; i < row_end; i++) {
1954 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
1955 memset(qp_ptr, qp_clip, (col_end - col_start));
1960 * TODO: Disable it for non CBR-CQP.
1962 vme_context->roi_enabled = 0;
1965 if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
1966 encoder_context->soft_batch_force = 1;
1973 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
1974 VAPictureHEVC *ref_list,
1978 int i, found = -1, min = 0x7FFFFFFF;
1980 for (i = 0; i < num_pictures; i++) {
1983 if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
1984 (ref_list[i].picture_id == VA_INVALID_SURFACE))
1987 tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
1992 if (tmp > 0 && tmp < min) {
2001 intel_hevc_vme_reference_state(VADriverContextP ctx,
2002 struct encode_state *encode_state,
2003 struct intel_encoder_context *encoder_context,
2006 void (* vme_source_surface_state)(
2007 VADriverContextP ctx,
2009 struct object_surface *obj_surface,
2010 struct intel_encoder_context *encoder_context))
2012 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2013 struct object_surface *obj_surface = NULL;
2014 struct i965_driver_data *i965 = i965_driver_data(ctx);
2015 VASurfaceID ref_surface_id;
2016 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2017 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2018 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2019 int max_num_references;
2020 VAPictureHEVC *curr_pic;
2021 VAPictureHEVC *ref_list;
2023 unsigned int is_hevc10 = 0;
2024 GenHevcSurface *hevc_encoder_surface = NULL;
2026 if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2027 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2030 if (list_index == 0) {
2031 max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2032 ref_list = slice_param->ref_pic_list0;
2034 max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2035 ref_list = slice_param->ref_pic_list1;
2038 if (max_num_references == 1) {
2039 if (list_index == 0) {
2040 ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2041 vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2043 ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2044 vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2047 if (ref_surface_id != VA_INVALID_SURFACE)
2048 obj_surface = SURFACE(ref_surface_id);
2052 obj_surface = encode_state->reference_objects[list_index];
2053 vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2058 curr_pic = &pic_param->decoded_curr_pic;
2060 /* select the reference frame in temporal space */
2061 ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2062 ref_surface_id = ref_list[ref_idx].picture_id;
2064 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2065 obj_surface = SURFACE(ref_surface_id);
2067 vme_context->used_reference_objects[list_index] = obj_surface;
2068 vme_context->used_references[list_index] = &ref_list[ref_idx];
2073 assert(ref_idx >= 0);
2074 vme_context->used_reference_objects[list_index] = obj_surface;
2077 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2078 assert(hevc_encoder_surface);
2079 obj_surface = hevc_encoder_surface->nv12_surface_obj;
2081 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2082 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2087 vme_context->used_reference_objects[list_index] = NULL;
2088 vme_context->used_references[list_index] = NULL;
2089 vme_context->ref_index_in_mb[list_index] = 0;
2093 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2094 struct encode_state *encode_state,
2095 struct intel_encoder_context *encoder_context)
2097 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2098 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2099 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2100 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2101 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2102 int qp, m_cost, j, mv_count;
2103 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2104 float lambda, m_costf;
2106 /* here no SI SP slice for HEVC, do not need slice fixup */
2107 int slice_type = slice_param->slice_type;
2110 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2112 if(encoder_context->rate_control_mode == VA_RC_CBR)
2114 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2115 if(slice_type == HEVC_SLICE_B) {
2116 if(pSequenceParameter->ip_period == 1)
2118 slice_type = HEVC_SLICE_P;
2119 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2121 }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2122 slice_type = HEVC_SLICE_P;
2123 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2129 if (vme_state_message == NULL)
2132 assert(qp <= QP_MAX);
2133 lambda = intel_lambda_qp(qp);
2134 if (slice_type == HEVC_SLICE_I) {
2135 vme_state_message[MODE_INTRA_16X16] = 0;
2136 m_cost = lambda * 4;
2137 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2138 m_cost = lambda * 16;
2139 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2140 m_cost = lambda * 3;
2141 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2144 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2145 for (j = 1; j < 3; j++) {
2146 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2147 m_cost = (int)m_costf;
2148 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2151 for (j = 4; j <= 64; j *= 2) {
2152 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2153 m_cost = (int)m_costf;
2154 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2159 vme_state_message[MODE_INTRA_16X16] = 0x4a;
2160 vme_state_message[MODE_INTRA_8X8] = 0x4a;
2161 vme_state_message[MODE_INTRA_4X4] = 0x4a;
2162 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2163 vme_state_message[MODE_INTER_16X16] = 0x4a;
2164 vme_state_message[MODE_INTER_16X8] = 0x4a;
2165 vme_state_message[MODE_INTER_8X8] = 0x4a;
2166 vme_state_message[MODE_INTER_8X4] = 0x4a;
2167 vme_state_message[MODE_INTER_4X4] = 0x4a;
2168 vme_state_message[MODE_INTER_BWD] = 0x2a;
2171 m_costf = lambda * 10;
2172 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2173 m_cost = lambda * 14;
2174 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2175 m_cost = lambda * 24;
2176 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2177 m_costf = lambda * 3.5;
2179 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2180 if (slice_type == HEVC_SLICE_P) {
2181 m_costf = lambda * 2.5;
2183 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2184 m_costf = lambda * 4;
2186 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2187 m_costf = lambda * 1.5;
2189 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2190 m_costf = lambda * 3;
2192 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2193 m_costf = lambda * 5;
2195 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2196 /* BWD is not used in P-frame */
2197 vme_state_message[MODE_INTER_BWD] = 0;
2199 m_costf = lambda * 2.5;
2201 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2202 m_costf = lambda * 5.5;
2204 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2205 m_costf = lambda * 3.5;
2207 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2208 m_costf = lambda * 5.0;
2210 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2211 m_costf = lambda * 6.5;
2213 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2214 m_costf = lambda * 1.5;
2216 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);