2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
48 #define log2f(x) (logf(x)/(float)M_LN2)
51 int intel_avc_enc_slice_type_fixup(int slice_type)
53 if (slice_type == SLICE_TYPE_SP ||
54 slice_type == SLICE_TYPE_P)
55 slice_type = SLICE_TYPE_P;
56 else if (slice_type == SLICE_TYPE_SI ||
57 slice_type == SLICE_TYPE_I)
58 slice_type = SLICE_TYPE_I;
60 if (slice_type != SLICE_TYPE_B)
61 WARN_ONCE("Invalid slice type for H.264 encoding!\n");
63 slice_type = SLICE_TYPE_B;
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
71 struct intel_encoder_context *encoder_context)
73 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
76 for(i = 0 ; i < 3; i++) {
77 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79 mfc_context->bit_rate_control_context[i].GrowInit = 6;
80 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
84 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94 struct intel_encoder_context* encoder_context)
96 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97 double bitrate, framerate;
98 double frame_per_bits = 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99 double qp1_size = 0.1 * frame_per_bits;
100 double qp51_size = 0.001 * frame_per_bits;
101 double bpf, factor, hrd_factor;
102 int inum = encoder_context->brc.num_iframes_in_gop,
103 pnum = encoder_context->brc.num_pframes_in_gop,
104 bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
105 int intra_period = encoder_context->brc.gop_size;
108 if (encoder_context->layer.num_layers > 1)
109 qp1_size = 0.15 * frame_per_bits;
111 mfc_context->brc.mode = encoder_context->rate_control_mode;
113 mfc_context->hrd.violation_noted = 0;
115 for (i = 0; i < encoder_context->layer.num_layers; i++) {
116 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
117 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
118 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
121 bitrate = encoder_context->brc.bits_per_second[0];
122 framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
124 bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
125 framerate = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) -
126 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
129 if (i == encoder_context->layer.num_layers - 1)
132 factor = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) /
133 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
136 hrd_factor = (double)bitrate / encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
138 mfc_context->hrd.buffer_size[i] = (unsigned int)(encoder_context->brc.hrd_buffer_size * hrd_factor);
139 mfc_context->hrd.current_buffer_fullness[i] =
140 (double)(encoder_context->brc.hrd_initial_buffer_fullness < encoder_context->brc.hrd_buffer_size) ?
141 encoder_context->brc.hrd_initial_buffer_fullness : encoder_context->brc.hrd_buffer_size / 2.;
142 mfc_context->hrd.current_buffer_fullness[i] *= hrd_factor;
143 mfc_context->hrd.target_buffer_fullness[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / 2.;
144 mfc_context->hrd.buffer_capacity[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / qp1_size;
146 if (encoder_context->layer.num_layers > 1) {
148 intra_period = (int)(encoder_context->brc.gop_size * factor);
150 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor);
151 bnum = intra_period - inum - pnum;
153 intra_period = (int)(encoder_context->brc.gop_size * factor) - intra_period;
155 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor) - pnum;
156 bnum = intra_period - inum - pnum;
160 mfc_context->brc.gop_nums[i][SLICE_TYPE_I] = inum;
161 mfc_context->brc.gop_nums[i][SLICE_TYPE_P] = pnum;
162 mfc_context->brc.gop_nums[i][SLICE_TYPE_B] = bnum;
164 mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
165 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
166 mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
167 mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
169 bpf = mfc_context->brc.bits_per_frame[i] = bitrate/framerate;
171 if ((bpf > qp51_size) && (bpf < qp1_size)) {
172 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
174 else if (bpf >= qp1_size)
175 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
176 else if (bpf <= qp51_size)
177 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
179 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
180 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
182 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], 1, 51);
183 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], 1, 51);
184 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], 1, 51);
188 int intel_mfc_update_hrd(struct encode_state *encode_state,
189 struct intel_encoder_context *encoder_context,
192 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
193 int layer_id = encoder_context->layer.curr_frame_layer_id;
194 double prev_bf = mfc_context->hrd.current_buffer_fullness[layer_id];
196 mfc_context->hrd.current_buffer_fullness[layer_id] -= frame_bits;
198 if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] <= 0.) {
199 mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
200 return BRC_UNDERFLOW;
203 mfc_context->hrd.current_buffer_fullness[layer_id] += mfc_context->brc.bits_per_frame[layer_id];
204 if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] > mfc_context->hrd.buffer_size[layer_id]) {
205 if (mfc_context->brc.mode == VA_RC_VBR)
206 mfc_context->hrd.current_buffer_fullness[layer_id] = mfc_context->hrd.buffer_size[layer_id];
208 mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
212 return BRC_NO_HRD_VIOLATION;
215 int intel_mfc_brc_postpack(struct encode_state *encode_state,
216 struct intel_encoder_context *encoder_context,
219 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
220 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
221 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
222 int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
223 int curr_frame_layer_id, next_frame_layer_id;
225 int qp; // quantizer of previously encoded slice of current type
226 int qpn; // predicted quantizer for next frame of current type in integer format
227 double qpf; // predicted quantizer for next frame of current type in float format
228 double delta_qp; // QP correction
229 int target_frame_size, frame_size_next;
231 * x - how far we are from HRD buffer borders
232 * y - how far we are from target HRD buffer fullness
235 double frame_size_alpha;
237 if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
238 curr_frame_layer_id = 0;
239 next_frame_layer_id = 0;
241 curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
242 next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
245 /* checking wthether HRD compliance first */
246 sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
248 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
251 next_frame_layer_id = curr_frame_layer_id;
254 mfc_context->brc.bits_prev_frame[curr_frame_layer_id] = frame_bits;
255 frame_bits = mfc_context->brc.bits_prev_frame[next_frame_layer_id];
257 mfc_context->brc.prev_slice_type[curr_frame_layer_id] = slicetype;
258 slicetype = mfc_context->brc.prev_slice_type[next_frame_layer_id];
260 /* 0 means the next frame is the first frame of next layer */
264 qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
265 qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
266 qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
268 qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
270 target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
271 if (mfc_context->hrd.buffer_capacity[next_frame_layer_id] < 5)
272 frame_size_alpha = 0;
274 frame_size_alpha = (double)mfc_context->brc.gop_nums[next_frame_layer_id][slicetype];
275 if (frame_size_alpha > 30) frame_size_alpha = 30;
276 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
277 (double)(frame_size_alpha + 1.);
279 /* frame_size_next: avoiding negative number and too small value */
280 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
281 frame_size_next = (int)((double)target_frame_size * 0.25);
283 qpf = (double)qp * target_frame_size / frame_size_next;
284 qpn = (int)(qpf + 0.5);
287 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
288 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] += qpf - qpn;
289 if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] > 1.0) {
291 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
292 } else if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] < -1.0) {
294 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
297 /* making sure that QP is not changing too fast */
298 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
299 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
300 /* making sure that with QP predictions we did do not leave QPs range */
301 BRC_CLIP(qpn, 1, 51);
303 /* calculating QP delta as some function*/
304 x = mfc_context->hrd.target_buffer_fullness[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
306 x /= mfc_context->hrd.target_buffer_fullness[next_frame_layer_id];
307 y = mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
310 x /= (mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.target_buffer_fullness[next_frame_layer_id]);
311 y = mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
313 if (y < 0.01) y = 0.01;
315 else if (x < -1) x = -1;
317 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
318 qpn = (int)(qpn + delta_qp + 0.5);
320 /* making sure that with QP predictions we did do not leave QPs range */
321 BRC_CLIP(qpn, 1, 51);
323 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
324 /* correcting QPs of slices of other types */
325 if (slicetype == SLICE_TYPE_P) {
326 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
327 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
328 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
329 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
330 } else if (slicetype == SLICE_TYPE_I) {
331 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
332 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
333 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
334 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
335 } else { // SLICE_TYPE_B
336 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
337 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
338 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
339 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
341 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], 1, 51);
342 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], 1, 51);
343 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], 1, 51);
344 } else if (sts == BRC_UNDERFLOW) { // underflow
345 if (qpn <= qp) qpn = qp + 1;
348 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
350 } else if (sts == BRC_OVERFLOW) {
351 if (qpn >= qp) qpn = qp - 1;
352 if (qpn < 1) { // < 0 (?) overflow with minQP
354 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
358 mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
363 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
364 struct intel_encoder_context *encoder_context)
366 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
367 unsigned int rate_control_mode = encoder_context->rate_control_mode;
368 int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
370 // current we only support CBR mode.
371 if (rate_control_mode == VA_RC_CBR) {
372 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
373 mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
374 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
375 mfc_context->vui_hrd.i_frame_number = 0;
377 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
378 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
379 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
385 intel_mfc_hrd_context_update(struct encode_state *encode_state,
386 struct gen6_mfc_context *mfc_context)
388 mfc_context->vui_hrd.i_frame_number++;
391 int intel_mfc_interlace_check(VADriverContextP ctx,
392 struct encode_state *encode_state,
393 struct intel_encoder_context *encoder_context)
395 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
396 VAEncSliceParameterBufferH264 *pSliceParameter;
399 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
400 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
402 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
403 pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
404 mbCount += pSliceParameter->num_macroblocks;
407 if ( mbCount == ( width_in_mbs * height_in_mbs ) )
413 void intel_mfc_brc_prepare(struct encode_state *encode_state,
414 struct intel_encoder_context *encoder_context)
416 unsigned int rate_control_mode = encoder_context->rate_control_mode;
418 if (encoder_context->codec != CODEC_H264 &&
419 encoder_context->codec != CODEC_H264_MVC)
422 if (rate_control_mode == VA_RC_CBR) {
423 /*Programing bit rate control */
424 if (encoder_context->brc.need_reset) {
425 intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
426 intel_mfc_brc_init(encode_state, encoder_context);
429 /*Programing HRD control */
430 if (encoder_context->brc.need_reset)
431 intel_mfc_hrd_context_init(encode_state, encoder_context);
435 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
436 struct encode_state *encode_state,
437 struct intel_encoder_context *encoder_context,
438 struct intel_batchbuffer *slice_batch)
440 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
441 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
442 unsigned int rate_control_mode = encoder_context->rate_control_mode;
443 unsigned int skip_emul_byte_cnt;
445 if (encode_state->packed_header_data[idx]) {
446 VAEncPackedHeaderParameterBuffer *param = NULL;
447 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
448 unsigned int length_in_bits;
450 assert(encode_state->packed_header_param[idx]);
451 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
452 length_in_bits = param->bit_length;
454 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
455 mfc_context->insert_object(ctx,
458 ALIGN(length_in_bits, 32) >> 5,
459 length_in_bits & 0x1f,
463 !param->has_emulation_bytes,
467 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
469 if (encode_state->packed_header_data[idx]) {
470 VAEncPackedHeaderParameterBuffer *param = NULL;
471 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
472 unsigned int length_in_bits;
474 assert(encode_state->packed_header_param[idx]);
475 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
476 length_in_bits = param->bit_length;
478 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
480 mfc_context->insert_object(ctx,
483 ALIGN(length_in_bits, 32) >> 5,
484 length_in_bits & 0x1f,
488 !param->has_emulation_bytes,
492 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
494 if (encode_state->packed_header_data[idx]) {
495 VAEncPackedHeaderParameterBuffer *param = NULL;
496 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
497 unsigned int length_in_bits;
499 assert(encode_state->packed_header_param[idx]);
500 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
501 length_in_bits = param->bit_length;
503 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
504 mfc_context->insert_object(ctx,
507 ALIGN(length_in_bits, 32) >> 5,
508 length_in_bits & 0x1f,
512 !param->has_emulation_bytes,
514 } else if (rate_control_mode == VA_RC_CBR) {
516 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
518 unsigned char *sei_data = NULL;
520 int length_in_bits = build_avc_sei_buffer_timing(
521 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
522 mfc_context->vui_hrd.i_initial_cpb_removal_delay,
524 mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
525 mfc_context->vui_hrd.i_dpb_output_delay_length,
528 mfc_context->insert_object(ctx,
530 (unsigned int *)sei_data,
531 ALIGN(length_in_bits, 32) >> 5,
532 length_in_bits & 0x1f,
542 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
543 struct encode_state *encode_state,
544 struct intel_encoder_context *encoder_context)
546 struct i965_driver_data *i965 = i965_driver_data(ctx);
547 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
548 struct object_surface *obj_surface;
549 struct object_buffer *obj_buffer;
550 GenAvcSurface *gen6_avc_surface;
552 VAStatus vaStatus = VA_STATUS_SUCCESS;
553 int i, j, enable_avc_ildb = 0;
554 VAEncSliceParameterBufferH264 *slice_param;
555 struct i965_coded_buffer_segment *coded_buffer_segment;
556 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
557 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
558 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
560 if (IS_GEN6(i965->intel.device_info)) {
561 /* On the SNB it should be fixed to 128 for the DMV buffer */
565 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
566 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
567 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
569 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
570 assert((slice_param->slice_type == SLICE_TYPE_I) ||
571 (slice_param->slice_type == SLICE_TYPE_SI) ||
572 (slice_param->slice_type == SLICE_TYPE_P) ||
573 (slice_param->slice_type == SLICE_TYPE_SP) ||
574 (slice_param->slice_type == SLICE_TYPE_B));
576 if (slice_param->disable_deblocking_filter_idc != 1) {
585 /*Setup all the input&output object*/
587 /* Setup current frame and current direct mv buffer*/
588 obj_surface = encode_state->reconstructed_object;
589 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
591 if ( obj_surface->private_data == NULL) {
592 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
593 assert(gen6_avc_surface);
594 gen6_avc_surface->dmv_top =
595 dri_bo_alloc(i965->intel.bufmgr,
597 68 * width_in_mbs * height_in_mbs,
599 gen6_avc_surface->dmv_bottom =
600 dri_bo_alloc(i965->intel.bufmgr,
602 68 * width_in_mbs * height_in_mbs,
604 assert(gen6_avc_surface->dmv_top);
605 assert(gen6_avc_surface->dmv_bottom);
606 obj_surface->private_data = (void *)gen6_avc_surface;
607 obj_surface->free_private_data = (void *)gen_free_avc_surface;
609 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
610 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
611 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
612 dri_bo_reference(gen6_avc_surface->dmv_top);
613 dri_bo_reference(gen6_avc_surface->dmv_bottom);
615 if (enable_avc_ildb) {
616 mfc_context->post_deblocking_output.bo = obj_surface->bo;
617 dri_bo_reference(mfc_context->post_deblocking_output.bo);
619 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
620 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
623 mfc_context->surface_state.width = obj_surface->orig_width;
624 mfc_context->surface_state.height = obj_surface->orig_height;
625 mfc_context->surface_state.w_pitch = obj_surface->width;
626 mfc_context->surface_state.h_pitch = obj_surface->height;
628 /* Setup reference frames and direct mv buffers*/
629 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
630 obj_surface = encode_state->reference_objects[i];
632 if (obj_surface && obj_surface->bo) {
633 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
634 dri_bo_reference(obj_surface->bo);
636 /* Check DMV buffer */
637 if ( obj_surface->private_data == NULL) {
639 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
640 assert(gen6_avc_surface);
641 gen6_avc_surface->dmv_top =
642 dri_bo_alloc(i965->intel.bufmgr,
644 68 * width_in_mbs * height_in_mbs,
646 gen6_avc_surface->dmv_bottom =
647 dri_bo_alloc(i965->intel.bufmgr,
649 68 * width_in_mbs * height_in_mbs,
651 assert(gen6_avc_surface->dmv_top);
652 assert(gen6_avc_surface->dmv_bottom);
653 obj_surface->private_data = gen6_avc_surface;
654 obj_surface->free_private_data = gen_free_avc_surface;
657 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
658 /* Setup DMV buffer */
659 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
660 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
661 dri_bo_reference(gen6_avc_surface->dmv_top);
662 dri_bo_reference(gen6_avc_surface->dmv_bottom);
668 mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
669 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
671 obj_buffer = encode_state->coded_buf_object;
672 bo = obj_buffer->buffer_store->bo;
673 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
674 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
675 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
676 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
679 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
680 coded_buffer_segment->mapped = 0;
681 coded_buffer_segment->codec = encoder_context->codec;
687 * The LUT uses the pair of 4-bit units: (shift, base) structure.
689 * So it is necessary to convert one cost into the nearest LUT format.
691 * 2^K *x = 2^n * (1 + deltaX)
692 * k + log2(x) = n + log2(1 + deltaX)
693 * log2(x) = n - k + log2(1 + deltaX)
694 * As X is in the range of [1, 15]
695 * 4 > n - k + log2(1 + deltaX) >= 0
696 * => n + log2(1 + deltaX) >= k > n - 4 + log2(1 + deltaX)
697 * Then we can derive the corresponding K and get the nearest LUT format.
699 int intel_format_lutvalue(int value, int max)
702 int logvalue, temp1, temp2;
707 logvalue = (int)(log2f((float)value));
711 int error, temp_value, base, j, temp_err;
713 j = logvalue - 4 + 1;
715 for(; j <= logvalue; j++) {
719 base = (value + (1 << (j - 1)) - 1) >> j;
724 temp_value = base << j;
725 temp_err = abs(value - temp_value);
726 if (temp_err < error) {
728 ret = (j << 4) | base;
734 temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
735 temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
744 #define VP8_QP_MAX 128
747 static float intel_lambda_qp(int qp)
749 float value, lambdaf;
751 value = value / 6 - 2;
754 lambdaf = roundf(powf(2, value));
759 void intel_h264_calc_mbmvcost_qp(int qp,
761 uint8_t *vme_state_message)
763 int m_cost, j, mv_count;
764 float lambda, m_costf;
766 assert(qp <= QP_MAX);
767 lambda = intel_lambda_qp(qp);
770 vme_state_message[MODE_CHROMA_INTRA] = 0;
771 vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
773 if (slice_type == SLICE_TYPE_I) {
774 vme_state_message[MODE_INTRA_16X16] = 0;
776 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
777 m_cost = lambda * 16;
778 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
780 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
783 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
784 for (j = 1; j < 3; j++) {
785 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
786 m_cost = (int)m_costf;
787 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
790 for (j = 4; j <= 64; j *= 2) {
791 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
792 m_cost = (int)m_costf;
793 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
798 vme_state_message[MODE_INTRA_16X16] = 0x4a;
799 vme_state_message[MODE_INTRA_8X8] = 0x4a;
800 vme_state_message[MODE_INTRA_4X4] = 0x4a;
801 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
802 vme_state_message[MODE_INTER_16X16] = 0x4a;
803 vme_state_message[MODE_INTER_16X8] = 0x4a;
804 vme_state_message[MODE_INTER_8X8] = 0x4a;
805 vme_state_message[MODE_INTER_8X4] = 0x4a;
806 vme_state_message[MODE_INTER_4X4] = 0x4a;
807 vme_state_message[MODE_INTER_BWD] = 0x2a;
810 m_costf = lambda * 10;
811 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
812 m_cost = lambda * 14;
813 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
814 m_cost = lambda * 24;
815 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
816 m_costf = lambda * 3.5;
818 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
819 if (slice_type == SLICE_TYPE_P) {
820 m_costf = lambda * 2.5;
822 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
823 m_costf = lambda * 4;
825 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
826 m_costf = lambda * 1.5;
828 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
829 m_costf = lambda * 3;
831 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
832 m_costf = lambda * 5;
834 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
835 /* BWD is not used in P-frame */
836 vme_state_message[MODE_INTER_BWD] = 0;
838 m_costf = lambda * 2.5;
840 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
841 m_costf = lambda * 5.5;
843 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
844 m_costf = lambda * 3.5;
846 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
847 m_costf = lambda * 5.0;
849 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
850 m_costf = lambda * 6.5;
852 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
853 m_costf = lambda * 1.5;
855 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
861 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
862 struct encode_state *encode_state,
863 struct intel_encoder_context *encoder_context)
865 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
866 struct gen6_vme_context *vme_context = encoder_context->vme_context;
867 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
868 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
870 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
872 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
874 if (encoder_context->rate_control_mode == VA_RC_CQP)
875 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
877 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
879 if (vme_state_message == NULL)
882 intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
885 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
886 struct encode_state *encode_state,
887 struct intel_encoder_context *encoder_context)
889 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
890 struct gen6_vme_context *vme_context = encoder_context->vme_context;
891 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
892 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
893 int qp, m_cost, j, mv_count;
894 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
895 float lambda, m_costf;
897 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
898 int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
900 if (vme_state_message == NULL)
903 if (encoder_context->rate_control_mode == VA_RC_CQP)
904 qp = q_matrix->quantization_index[0];
906 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
908 lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
911 vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
914 vme_state_message[MODE_INTRA_16X16] = 0;
915 m_cost = lambda * 16;
916 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
918 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
921 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
922 for (j = 1; j < 3; j++) {
923 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
924 m_cost = (int)m_costf;
925 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
928 for (j = 4; j <= 64; j *= 2) {
929 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
930 m_cost = (int)m_costf;
931 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
936 vme_state_message[MODE_INTRA_16X16] = 0x4a;
937 vme_state_message[MODE_INTRA_4X4] = 0x4a;
938 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
939 vme_state_message[MODE_INTER_16X16] = 0x4a;
940 vme_state_message[MODE_INTER_16X8] = 0x4a;
941 vme_state_message[MODE_INTER_8X8] = 0x4a;
942 vme_state_message[MODE_INTER_4X4] = 0x4a;
943 vme_state_message[MODE_INTER_BWD] = 0;
946 m_costf = lambda * 10;
947 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
948 m_cost = lambda * 24;
949 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
951 m_costf = lambda * 3.5;
953 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
955 m_costf = lambda * 2.5;
957 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
958 m_costf = lambda * 4;
960 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
961 m_costf = lambda * 1.5;
963 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
964 m_costf = lambda * 5;
966 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
967 /* BWD is not used in P-frame */
968 vme_state_message[MODE_INTER_BWD] = 0;
972 #define MB_SCOREBOARD_A (1 << 0)
973 #define MB_SCOREBOARD_B (1 << 1)
974 #define MB_SCOREBOARD_C (1 << 2)
976 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
978 vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
979 vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
980 vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
984 /* In VME prediction the current mb depends on the neighbour
985 * A/B/C macroblock. So the left/up/up-right dependency should
988 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
989 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
990 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
991 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
992 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
993 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
995 vme_context->gpe_context.vfe_desc7.dword = 0;
999 /* check whether the mb of (x_index, y_index) is out of bound */
1000 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
1003 if (x_index < 0 || x_index >= mb_width)
1005 if (y_index < 0 || y_index >= mb_height)
1008 mb_index = y_index * mb_width + x_index;
1009 if (mb_index < first_mb || mb_index > (first_mb + num_mb))
1015 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1016 struct encode_state *encode_state,
1017 int mb_width, int mb_height,
1019 int transform_8x8_mode_flag,
1020 struct intel_encoder_context *encoder_context)
1022 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1025 unsigned int *command_ptr;
1026 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1027 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1028 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1029 int qp,qp_mb,qp_index;
1030 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1032 if (encoder_context->rate_control_mode == VA_RC_CQP)
1033 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1035 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1037 #define USE_SCOREBOARD (1 << 21)
1039 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1040 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1042 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1043 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1044 int first_mb = pSliceParameter->macroblock_address;
1045 int num_mb = pSliceParameter->num_macroblocks;
1046 unsigned int mb_intra_ub, score_dep;
1047 int x_outer, y_outer, x_inner, y_inner;
1048 int xtemp_outer = 0;
1050 x_outer = first_mb % mb_width;
1051 y_outer = first_mb / mb_width;
1054 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1057 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1061 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1062 score_dep |= MB_SCOREBOARD_A;
1064 if (y_inner != mb_row) {
1065 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1066 score_dep |= MB_SCOREBOARD_B;
1068 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1069 if (x_inner != (mb_width -1)) {
1070 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1071 score_dep |= MB_SCOREBOARD_C;
1075 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1076 *command_ptr++ = kernel;
1077 *command_ptr++ = USE_SCOREBOARD;
1080 /* the (X, Y) term of scoreboard */
1081 *command_ptr++ = ((y_inner << 16) | x_inner);
1082 *command_ptr++ = score_dep;
1084 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1085 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1086 /* QP occupies one byte */
1087 if (vme_context->roi_enabled) {
1088 qp_index = y_inner * mb_width + x_inner;
1089 qp_mb = *(vme_context->qp_per_mb + qp_index);
1092 *command_ptr++ = qp_mb;
1099 xtemp_outer = mb_width - 2;
1100 if (xtemp_outer < 0)
1102 x_outer = xtemp_outer;
1103 y_outer = first_mb / mb_width;
1104 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1107 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1111 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1112 score_dep |= MB_SCOREBOARD_A;
1114 if (y_inner != mb_row) {
1115 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1116 score_dep |= MB_SCOREBOARD_B;
1118 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1120 if (x_inner != (mb_width -1)) {
1121 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1122 score_dep |= MB_SCOREBOARD_C;
1126 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1127 *command_ptr++ = kernel;
1128 *command_ptr++ = USE_SCOREBOARD;
1131 /* the (X, Y) term of scoreboard */
1132 *command_ptr++ = ((y_inner << 16) | x_inner);
1133 *command_ptr++ = score_dep;
1135 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1136 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1137 /* qp occupies one byte */
1138 if (vme_context->roi_enabled) {
1139 qp_index = y_inner * mb_width + x_inner;
1140 qp_mb = *(vme_context->qp_per_mb + qp_index);
1143 *command_ptr++ = qp_mb;
1149 if (x_outer >= mb_width) {
1151 x_outer = xtemp_outer;
1157 *command_ptr++ = MI_BATCH_BUFFER_END;
1159 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1163 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1165 unsigned int is_long_term =
1166 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1167 unsigned int is_top_field =
1168 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1169 unsigned int is_bottom_field =
1170 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1172 return ((is_long_term << 6) |
1173 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1174 (frame_store_id << 1) |
1175 ((is_top_field ^ 1) & is_bottom_field));
1179 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1180 struct encode_state *encode_state,
1181 struct intel_encoder_context *encoder_context)
1183 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1184 struct intel_batchbuffer *batch = encoder_context->base.batch;
1186 struct object_surface *obj_surface;
1187 unsigned int fref_entry, bref_entry;
1189 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1191 fref_entry = 0x80808080;
1192 bref_entry = 0x80808080;
1193 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1195 if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1196 int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1198 if (ref_idx_l0 > 3) {
1199 WARN_ONCE("ref_idx_l0 is out of range\n");
1203 obj_surface = vme_context->used_reference_objects[0];
1205 for (i = 0; i < 16; i++) {
1207 obj_surface == encode_state->reference_objects[i]) {
1212 if (frame_index == -1) {
1213 WARN_ONCE("RefPicList0 is not found in DPB!\n");
1215 int ref_idx_l0_shift = ref_idx_l0 * 8;
1216 fref_entry &= ~(0xFF << ref_idx_l0_shift);
1217 fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1221 if (slice_type == SLICE_TYPE_B) {
1222 int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1224 if (ref_idx_l1 > 3) {
1225 WARN_ONCE("ref_idx_l1 is out of range\n");
1229 obj_surface = vme_context->used_reference_objects[1];
1231 for (i = 0; i < 16; i++) {
1233 obj_surface == encode_state->reference_objects[i]) {
1238 if (frame_index == -1) {
1239 WARN_ONCE("RefPicList1 is not found in DPB!\n");
1241 int ref_idx_l1_shift = ref_idx_l1 * 8;
1242 bref_entry &= ~(0xFF << ref_idx_l1_shift);
1243 bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1247 BEGIN_BCS_BATCH(batch, 10);
1248 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1249 OUT_BCS_BATCH(batch, 0); //Select L0
1250 OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference
1251 for(i = 0; i < 7; i++) {
1252 OUT_BCS_BATCH(batch, 0x80808080);
1254 ADVANCE_BCS_BATCH(batch);
1256 BEGIN_BCS_BATCH(batch, 10);
1257 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1258 OUT_BCS_BATCH(batch, 1); //Select L1
1259 OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference
1260 for(i = 0; i < 7; i++) {
1261 OUT_BCS_BATCH(batch, 0x80808080);
1263 ADVANCE_BCS_BATCH(batch);
1267 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1268 struct encode_state *encode_state,
1269 struct intel_encoder_context *encoder_context)
1271 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1272 uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1273 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1274 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1275 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1276 uint32_t mv_x, mv_y;
1277 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1278 VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1279 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1281 if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1284 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1287 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1291 WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1296 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1297 if (pic_param->picture_type != VAEncPictureTypeIntra) {
1298 int qp, m_cost, j, mv_count;
1299 float lambda, m_costf;
1300 slice_param = (VAEncSliceParameterBufferMPEG2 *)
1301 encode_state->slice_params_ext[0]->buffer;
1302 qp = slice_param->quantiser_scale_code;
1303 lambda = intel_lambda_qp(qp);
1304 /* No Intra prediction. So it is zero */
1305 vme_state_message[MODE_INTRA_8X8] = 0;
1306 vme_state_message[MODE_INTRA_4X4] = 0;
1307 vme_state_message[MODE_INTER_MV0] = 0;
1308 for (j = 1; j < 3; j++) {
1309 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1310 m_cost = (int)m_costf;
1311 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1314 for (j = 4; j <= 64; j *= 2) {
1315 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1316 m_cost = (int)m_costf;
1317 vme_state_message[MODE_INTER_MV0 + mv_count] =
1318 intel_format_lutvalue(m_cost, 0x6f);
1322 /* It can only perform the 16x16 search. So mode cost can be ignored for
1323 * the other mode. for example: 16x8/8x8
1325 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1326 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1328 vme_state_message[MODE_INTER_16X8] = 0;
1329 vme_state_message[MODE_INTER_8X8] = 0;
1330 vme_state_message[MODE_INTER_8X4] = 0;
1331 vme_state_message[MODE_INTER_4X4] = 0;
1332 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1335 vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1337 vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1342 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1343 struct encode_state *encode_state,
1344 int mb_width, int mb_height,
1346 struct intel_encoder_context *encoder_context)
1348 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1349 unsigned int *command_ptr;
1351 #define MPEG2_SCOREBOARD (1 << 21)
1353 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1354 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1357 unsigned int mb_intra_ub, score_dep;
1358 int x_outer, y_outer, x_inner, y_inner;
1359 int xtemp_outer = 0;
1361 int num_mb = mb_width * mb_height;
1367 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1370 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1374 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1375 score_dep |= MB_SCOREBOARD_A;
1378 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1379 score_dep |= MB_SCOREBOARD_B;
1382 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1384 if (x_inner != (mb_width -1)) {
1385 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1386 score_dep |= MB_SCOREBOARD_C;
1390 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1391 *command_ptr++ = kernel;
1392 *command_ptr++ = MPEG2_SCOREBOARD;
1395 /* the (X, Y) term of scoreboard */
1396 *command_ptr++ = ((y_inner << 16) | x_inner);
1397 *command_ptr++ = score_dep;
1399 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1400 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1407 xtemp_outer = mb_width - 2;
1408 if (xtemp_outer < 0)
1410 x_outer = xtemp_outer;
1412 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1415 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1419 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1420 score_dep |= MB_SCOREBOARD_A;
1423 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1424 score_dep |= MB_SCOREBOARD_B;
1427 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1429 if (x_inner != (mb_width -1)) {
1430 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1431 score_dep |= MB_SCOREBOARD_C;
1435 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1436 *command_ptr++ = kernel;
1437 *command_ptr++ = MPEG2_SCOREBOARD;
1440 /* the (X, Y) term of scoreboard */
1441 *command_ptr++ = ((y_inner << 16) | x_inner);
1442 *command_ptr++ = score_dep;
1444 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1445 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1451 if (x_outer >= mb_width) {
1453 x_outer = xtemp_outer;
1459 *command_ptr++ = MI_BATCH_BUFFER_END;
1461 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1466 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1467 VAPictureH264 *ref_list,
1471 int i, found = -1, min = 0x7FFFFFFF;
1473 for (i = 0; i < num_pictures; i++) {
1476 if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1477 (ref_list[i].picture_id == VA_INVALID_SURFACE))
1480 tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1485 if (tmp > 0 && tmp < min) {
1495 intel_avc_vme_reference_state(VADriverContextP ctx,
1496 struct encode_state *encode_state,
1497 struct intel_encoder_context *encoder_context,
1500 void (* vme_source_surface_state)(
1501 VADriverContextP ctx,
1503 struct object_surface *obj_surface,
1504 struct intel_encoder_context *encoder_context))
1506 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1507 struct object_surface *obj_surface = NULL;
1508 struct i965_driver_data *i965 = i965_driver_data(ctx);
1509 VASurfaceID ref_surface_id;
1510 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1511 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1512 int max_num_references;
1513 VAPictureH264 *curr_pic;
1514 VAPictureH264 *ref_list;
1517 if (list_index == 0) {
1518 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1519 ref_list = slice_param->RefPicList0;
1521 max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1522 ref_list = slice_param->RefPicList1;
1525 if (max_num_references == 1) {
1526 if (list_index == 0) {
1527 ref_surface_id = slice_param->RefPicList0[0].picture_id;
1528 vme_context->used_references[0] = &slice_param->RefPicList0[0];
1530 ref_surface_id = slice_param->RefPicList1[0].picture_id;
1531 vme_context->used_references[1] = &slice_param->RefPicList1[0];
1534 if (ref_surface_id != VA_INVALID_SURFACE)
1535 obj_surface = SURFACE(ref_surface_id);
1539 obj_surface = encode_state->reference_objects[list_index];
1540 vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1545 curr_pic = &pic_param->CurrPic;
1547 /* select the reference frame in temporal space */
1548 ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1549 ref_surface_id = ref_list[ref_idx].picture_id;
1551 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1552 obj_surface = SURFACE(ref_surface_id);
1554 vme_context->used_reference_objects[list_index] = obj_surface;
1555 vme_context->used_references[list_index] = &ref_list[ref_idx];
1560 assert(ref_idx >= 0);
1561 vme_context->used_reference_objects[list_index] = obj_surface;
1562 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1563 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1568 vme_context->used_reference_objects[list_index] = NULL;
1569 vme_context->used_references[list_index] = NULL;
1570 vme_context->ref_index_in_mb[list_index] = 0;
1574 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1575 struct encode_state *encode_state,
1576 struct intel_encoder_context *encoder_context,
1578 struct intel_batchbuffer *slice_batch)
1580 int count, i, start_index;
1581 unsigned int length_in_bits;
1582 VAEncPackedHeaderParameterBuffer *param = NULL;
1583 unsigned int *header_data = NULL;
1584 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1585 int slice_header_index;
1587 if (encode_state->slice_header_index[slice_index] == 0)
1588 slice_header_index = -1;
1590 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1592 count = encode_state->slice_rawdata_count[slice_index];
1593 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1595 for (i = 0; i < count; i++) {
1596 unsigned int skip_emul_byte_cnt;
1598 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1600 param = (VAEncPackedHeaderParameterBuffer *)
1601 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1603 /* skip the slice header packed data type as it is lastly inserted */
1604 if (param->type == VAEncPackedHeaderSlice)
1607 length_in_bits = param->bit_length;
1609 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1611 /* as the slice header is still required, the last header flag is set to
1614 mfc_context->insert_object(ctx,
1617 ALIGN(length_in_bits, 32) >> 5,
1618 length_in_bits & 0x1f,
1622 !param->has_emulation_bytes,
1626 if (slice_header_index == -1) {
1627 unsigned char *slice_header = NULL;
1628 int slice_header_length_in_bits = 0;
1629 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1630 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1631 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1633 /* No slice header data is passed. And the driver needs to generate it */
1634 /* For the Normal H264 */
1635 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1639 mfc_context->insert_object(ctx, encoder_context,
1640 (unsigned int *)slice_header,
1641 ALIGN(slice_header_length_in_bits, 32) >> 5,
1642 slice_header_length_in_bits & 0x1f,
1643 5, /* first 5 bytes are start code + nal unit type */
1644 1, 0, 1, slice_batch);
1648 unsigned int skip_emul_byte_cnt;
1650 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1652 param = (VAEncPackedHeaderParameterBuffer *)
1653 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1654 length_in_bits = param->bit_length;
1656 /* as the slice header is the last header data for one slice,
1657 * the last header flag is set to one.
1659 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1661 mfc_context->insert_object(ctx,
1664 ALIGN(length_in_bits, 32) >> 5,
1665 length_in_bits & 0x1f,
1669 !param->has_emulation_bytes,
1677 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1678 struct encode_state *encode_state,
1679 struct intel_encoder_context *encoder_context)
1681 struct i965_driver_data *i965 = i965_driver_data(ctx);
1682 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1683 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1686 uint8_t *cost_table;
1688 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1691 if (slice_type == SLICE_TYPE_I) {
1692 if (vme_context->i_qp_cost_table)
1694 } else if (slice_type == SLICE_TYPE_P) {
1695 if (vme_context->p_qp_cost_table)
1698 if (vme_context->b_qp_cost_table)
1702 /* It is enough to allocate 32 bytes for each qp. */
1703 bo = dri_bo_alloc(i965->intel.bufmgr,
1709 assert(bo->virtual);
1710 cost_table = (uint8_t *)(bo->virtual);
1711 for (qp = 0; qp < QP_MAX; qp++) {
1712 intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1718 if (slice_type == SLICE_TYPE_I) {
1719 vme_context->i_qp_cost_table = bo;
1720 } else if (slice_type == SLICE_TYPE_P) {
1721 vme_context->p_qp_cost_table = bo;
1723 vme_context->b_qp_cost_table = bo;
1726 vme_context->cost_table_size = QP_MAX * 32;
1731 intel_h264_setup_cost_surface(VADriverContextP ctx,
1732 struct encode_state *encode_state,
1733 struct intel_encoder_context *encoder_context,
1734 unsigned long binding_table_offset,
1735 unsigned long surface_state_offset)
1737 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1738 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1742 struct i965_buffer_surface cost_table;
1744 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1747 if (slice_type == SLICE_TYPE_I) {
1748 bo = vme_context->i_qp_cost_table;
1749 } else if (slice_type == SLICE_TYPE_P) {
1750 bo = vme_context->p_qp_cost_table;
1752 bo = vme_context->b_qp_cost_table;
1756 cost_table.num_blocks = QP_MAX;
1757 cost_table.pitch = 16;
1758 cost_table.size_block = 32;
1760 vme_context->vme_buffer_suface_setup(ctx,
1761 &vme_context->gpe_context,
1763 binding_table_offset,
1764 surface_state_offset);
1768 * the idea of conversion between qp and qstep comes from scaling process
1769 * of transform coeff for Luma component in H264 spec.
1771 * In order to avoid too small qstep, it is multiplied by 16.
1773 static float intel_h264_qp_qstep(int qp)
1777 value = value / 6 - 2;
1778 qstep = powf(2, value);
1782 static int intel_h264_qstep_qp(float qstep)
1786 qp = 12.0f + 6.0f * log2f(qstep);
1792 * Currently it is based on the following assumption:
1793 * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1794 * total_aread * 1 / baseqp_qstep
1796 * qstep is the linearized quantizer of H264 quantizer
1799 int row_start_in_mb;
1801 int col_start_in_mb;
1811 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1813 VAEncMiscParameterBufferROI *pMiscParamROI,
1814 struct encode_state *encode_state,
1815 struct intel_encoder_context *encoder_context)
1818 VAEncROI *region_roi;
1821 ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1826 float qstep_nonroi, qstep_base;
1827 float roi_area, total_area, nonroi_area;
1830 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1831 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1832 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1833 int mbs_in_picture = width_in_mbs * height_in_mbs;
1835 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1836 VAStatus vaStatus = VA_STATUS_SUCCESS;
1838 if(pMiscParamROI != NULL)
1840 num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi;
1842 /* currently roi_value_is_qp_delta is the only supported mode of priority.
1844 * qp_delta set by user is added to base_qp, which is then clapped by
1845 * [base_qp-min_delta, base_qp+max_delta].
1847 ASSERT_RET(pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta,VA_STATUS_ERROR_INVALID_PARAMETER);
1850 /* when the base_qp is lower than 12, the quality is quite good based
1851 * on the H264 test experience.
1852 * In such case it is unnecessary to adjust the quality for ROI region.
1854 if (base_qp <= 12) {
1855 nonroi_qp = base_qp;
1862 for (i = 0; i < num_roi; i++) {
1863 int row_start, row_end, col_start, col_end;
1864 int roi_width_mbs, roi_height_mbs;
1869 region_roi = (VAEncROI *)pMiscParamROI->roi + i;
1871 col_start = region_roi->roi_rectangle.x;
1872 col_end = col_start + region_roi->roi_rectangle.width;
1873 row_start = region_roi->roi_rectangle.y;
1874 row_end = row_start + region_roi->roi_rectangle.height;
1875 col_start = col_start / 16;
1876 col_end = (col_end + 15) / 16;
1877 row_start = row_start / 16;
1878 row_end = (row_end + 15) / 16;
1880 roi_width_mbs = col_end - col_start;
1881 roi_height_mbs = row_end - row_start;
1882 mbs_in_roi = roi_width_mbs * roi_height_mbs;
1884 param_regions[i].row_start_in_mb = row_start;
1885 param_regions[i].row_end_in_mb = row_end;
1886 param_regions[i].col_start_in_mb = col_start;
1887 param_regions[i].col_end_in_mb = col_end;
1888 param_regions[i].width_mbs = roi_width_mbs;
1889 param_regions[i].height_mbs = roi_height_mbs;
1891 roi_qp = base_qp + region_roi->roi_value;
1892 BRC_CLIP(roi_qp, 1, 51);
1894 param_regions[i].roi_qp = roi_qp;
1895 qstep_roi = intel_h264_qp_qstep(roi_qp);
1897 roi_area += mbs_in_roi;
1898 sum_roi += mbs_in_roi / qstep_roi;
1901 total_area = mbs_in_picture;
1902 nonroi_area = total_area - roi_area;
1904 qstep_base = intel_h264_qp_qstep(base_qp);
1905 temp = (total_area / qstep_base - sum_roi);
1910 qstep_nonroi = nonroi_area / temp;
1911 nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
1914 BRC_CLIP(nonroi_qp, 1, 51);
1917 memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
1921 for (i = 0; i < num_roi; i++) {
1922 for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
1923 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
1924 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
1932 intel_h264_enc_roi_config(VADriverContextP ctx,
1933 struct encode_state *encode_state,
1934 struct intel_encoder_context *encoder_context)
1938 VAEncROI *region_roi;
1939 struct i965_driver_data *i965 = i965_driver_data(ctx);
1940 VAEncMiscParameterBuffer* pMiscParamROI;
1941 VAEncMiscParameterBufferROI *pParamROI = NULL;
1942 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1943 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1944 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1945 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1946 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1948 int row_start, row_end, col_start, col_end;
1951 vme_context->roi_enabled = 0;
1952 /* Restriction: Disable ROI when multi-slice is enabled */
1953 if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1956 if (encode_state->misc_param[VAEncMiscParameterTypeROI][0] != NULL) {
1957 pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI][0]->buffer;
1958 pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
1960 /* check whether number of ROI is correct */
1961 num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi;
1965 vme_context->roi_enabled = 1;
1967 if (!vme_context->roi_enabled)
1970 if ((vme_context->saved_width_mbs != width_in_mbs) ||
1971 (vme_context->saved_height_mbs != height_in_mbs)) {
1972 free(vme_context->qp_per_mb);
1973 vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1975 vme_context->saved_width_mbs = width_in_mbs;
1976 vme_context->saved_height_mbs = height_in_mbs;
1977 assert(vme_context->qp_per_mb);
1979 if (encoder_context->rate_control_mode == VA_RC_CBR) {
1981 * TODO: More complex Qp adjust needs to be added.
1982 * Currently it is initialized to slice_qp.
1984 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1986 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1988 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1989 intel_h264_enc_roi_cbr(ctx, qp, pParamROI,encode_state, encoder_context);
1991 } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1992 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1993 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1996 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1997 memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
2000 for (j = num_roi; j ; j--) {
2001 int qp_delta, qp_clip;
2003 region_roi = (VAEncROI *)pParamROI->roi + j - 1;
2005 col_start = region_roi->roi_rectangle.x;
2006 col_end = col_start + region_roi->roi_rectangle.width;
2007 row_start = region_roi->roi_rectangle.y;
2008 row_end = row_start + region_roi->roi_rectangle.height;
2010 col_start = col_start / 16;
2011 col_end = (col_end + 15) / 16;
2012 row_start = row_start / 16;
2013 row_end = (row_end + 15) / 16;
2015 qp_delta = region_roi->roi_value;
2016 qp_clip = qp + qp_delta;
2018 BRC_CLIP(qp_clip, 1, 51);
2020 for (i = row_start; i < row_end; i++) {
2021 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
2022 memset(qp_ptr, qp_clip, (col_end - col_start));
2027 * TODO: Disable it for non CBR-CQP.
2029 vme_context->roi_enabled = 0;
2032 if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2033 encoder_context->soft_batch_force = 1;
2040 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2041 VAPictureHEVC *ref_list,
2045 int i, found = -1, min = 0x7FFFFFFF;
2047 for (i = 0; i < num_pictures; i++) {
2050 if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2051 (ref_list[i].picture_id == VA_INVALID_SURFACE))
2054 tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2059 if (tmp > 0 && tmp < min) {
2068 intel_hevc_vme_reference_state(VADriverContextP ctx,
2069 struct encode_state *encode_state,
2070 struct intel_encoder_context *encoder_context,
2073 void (* vme_source_surface_state)(
2074 VADriverContextP ctx,
2076 struct object_surface *obj_surface,
2077 struct intel_encoder_context *encoder_context))
2079 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2080 struct object_surface *obj_surface = NULL;
2081 struct i965_driver_data *i965 = i965_driver_data(ctx);
2082 VASurfaceID ref_surface_id;
2083 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2084 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2085 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2086 int max_num_references;
2087 VAPictureHEVC *curr_pic;
2088 VAPictureHEVC *ref_list;
2090 unsigned int is_hevc10 = 0;
2091 GenHevcSurface *hevc_encoder_surface = NULL;
2093 if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2094 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2097 if (list_index == 0) {
2098 max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2099 ref_list = slice_param->ref_pic_list0;
2101 max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2102 ref_list = slice_param->ref_pic_list1;
2105 if (max_num_references == 1) {
2106 if (list_index == 0) {
2107 ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2108 vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2110 ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2111 vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2114 if (ref_surface_id != VA_INVALID_SURFACE)
2115 obj_surface = SURFACE(ref_surface_id);
2119 obj_surface = encode_state->reference_objects[list_index];
2120 vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2125 curr_pic = &pic_param->decoded_curr_pic;
2127 /* select the reference frame in temporal space */
2128 ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2129 ref_surface_id = ref_list[ref_idx].picture_id;
2131 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2132 obj_surface = SURFACE(ref_surface_id);
2134 vme_context->used_reference_objects[list_index] = obj_surface;
2135 vme_context->used_references[list_index] = &ref_list[ref_idx];
2140 assert(ref_idx >= 0);
2141 vme_context->used_reference_objects[list_index] = obj_surface;
2144 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2145 assert(hevc_encoder_surface);
2146 obj_surface = hevc_encoder_surface->nv12_surface_obj;
2148 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2149 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2154 vme_context->used_reference_objects[list_index] = NULL;
2155 vme_context->used_references[list_index] = NULL;
2156 vme_context->ref_index_in_mb[list_index] = 0;
2160 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2161 struct encode_state *encode_state,
2162 struct intel_encoder_context *encoder_context)
2164 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2165 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2166 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2167 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2168 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2169 int qp, m_cost, j, mv_count;
2170 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2171 float lambda, m_costf;
2173 /* here no SI SP slice for HEVC, do not need slice fixup */
2174 int slice_type = slice_param->slice_type;
2177 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2179 if(encoder_context->rate_control_mode == VA_RC_CBR)
2181 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2182 if(slice_type == HEVC_SLICE_B) {
2183 if(pSequenceParameter->ip_period == 1)
2185 slice_type = HEVC_SLICE_P;
2186 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2188 }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2189 slice_type = HEVC_SLICE_P;
2190 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2196 if (vme_state_message == NULL)
2199 assert(qp <= QP_MAX);
2200 lambda = intel_lambda_qp(qp);
2201 if (slice_type == HEVC_SLICE_I) {
2202 vme_state_message[MODE_INTRA_16X16] = 0;
2203 m_cost = lambda * 4;
2204 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2205 m_cost = lambda * 16;
2206 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2207 m_cost = lambda * 3;
2208 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2211 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2212 for (j = 1; j < 3; j++) {
2213 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2214 m_cost = (int)m_costf;
2215 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2218 for (j = 4; j <= 64; j *= 2) {
2219 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2220 m_cost = (int)m_costf;
2221 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2226 vme_state_message[MODE_INTRA_16X16] = 0x4a;
2227 vme_state_message[MODE_INTRA_8X8] = 0x4a;
2228 vme_state_message[MODE_INTRA_4X4] = 0x4a;
2229 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2230 vme_state_message[MODE_INTER_16X16] = 0x4a;
2231 vme_state_message[MODE_INTER_16X8] = 0x4a;
2232 vme_state_message[MODE_INTER_8X8] = 0x4a;
2233 vme_state_message[MODE_INTER_8X4] = 0x4a;
2234 vme_state_message[MODE_INTER_4X4] = 0x4a;
2235 vme_state_message[MODE_INTER_BWD] = 0x2a;
2238 m_costf = lambda * 10;
2239 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2240 m_cost = lambda * 14;
2241 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2242 m_cost = lambda * 24;
2243 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2244 m_costf = lambda * 3.5;
2246 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2247 if (slice_type == HEVC_SLICE_P) {
2248 m_costf = lambda * 2.5;
2250 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2251 m_costf = lambda * 4;
2253 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2254 m_costf = lambda * 1.5;
2256 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2257 m_costf = lambda * 3;
2259 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2260 m_costf = lambda * 5;
2262 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2263 /* BWD is not used in P-frame */
2264 vme_state_message[MODE_INTER_BWD] = 0;
2266 m_costf = lambda * 2.5;
2268 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2269 m_costf = lambda * 5.5;
2271 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2272 m_costf = lambda * 3.5;
2274 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2275 m_costf = lambda * 5.0;
2277 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2278 m_costf = lambda * 6.5;
2280 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2281 m_costf = lambda * 1.5;
2283 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);