2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
48 #define log2f(x) (logf(x)/(float)M_LN2)
51 int intel_avc_enc_slice_type_fixup(int slice_type)
53 if (slice_type == SLICE_TYPE_SP ||
54 slice_type == SLICE_TYPE_P)
55 slice_type = SLICE_TYPE_P;
56 else if (slice_type == SLICE_TYPE_SI ||
57 slice_type == SLICE_TYPE_I)
58 slice_type = SLICE_TYPE_I;
60 if (slice_type != SLICE_TYPE_B)
61 WARN_ONCE("Invalid slice type for H.264 encoding!\n");
63 slice_type = SLICE_TYPE_B;
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
71 struct intel_encoder_context *encoder_context)
73 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
76 for(i = 0 ; i < 3; i++) {
77 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79 mfc_context->bit_rate_control_context[i].GrowInit = 6;
80 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
84 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94 struct intel_encoder_context* encoder_context)
96 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97 double bitrate, framerate;
98 double qp1_size = 0.1 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99 double qp51_size = 0.001 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
101 int inum = encoder_context->brc.num_iframes_in_gop,
102 pnum = encoder_context->brc.num_pframes_in_gop,
103 bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
104 int intra_period = encoder_context->brc.gop_size;
107 mfc_context->brc.mode = encoder_context->rate_control_mode;
109 mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
110 mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
111 mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
113 mfc_context->hrd.buffer_size = encoder_context->brc.hrd_buffer_size;
114 mfc_context->hrd.current_buffer_fullness =
115 (double)(encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
116 encoder_context->brc.hrd_initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
117 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
118 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
119 mfc_context->hrd.violation_noted = 0;
121 for (i = 0; i < encoder_context->layer.num_layers; i++) {
122 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
123 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
124 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
127 bitrate = encoder_context->brc.bits_per_second[0];
128 framerate = (double)encoder_context->brc.framerate_per_100s[0] / 100.0;
130 bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
131 framerate = (double)(encoder_context->brc.framerate_per_100s[i] - encoder_context->brc.framerate_per_100s[i - 1]) / 100.0;
134 if (i == encoder_context->layer.num_layers - 1)
137 factor = (double)encoder_context->brc.framerate_per_100s[i] / encoder_context->brc.framerate_per_100s[i + 1];
139 mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period * factor)/framerate) /
140 (double)(inum + BRC_PWEIGHT * pnum * factor + BRC_BWEIGHT * bnum * factor));
141 mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
142 mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
144 bpf = mfc_context->brc.bits_per_frame[i] = bitrate/framerate;
146 if ((bpf > qp51_size) && (bpf < qp1_size)) {
147 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
149 else if (bpf >= qp1_size)
150 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
151 else if (bpf <= qp51_size)
152 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
154 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
155 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
157 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], 1, 51);
158 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], 1, 51);
159 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], 1, 51);
163 int intel_mfc_update_hrd(struct encode_state *encode_state,
164 struct intel_encoder_context *encoder_context,
167 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
168 double prev_bf = mfc_context->hrd.current_buffer_fullness;
170 mfc_context->hrd.current_buffer_fullness -= frame_bits;
172 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
173 mfc_context->hrd.current_buffer_fullness = prev_bf;
174 return BRC_UNDERFLOW;
177 mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame[encoder_context->layer.curr_frame_layer_id];
178 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
179 if (mfc_context->brc.mode == VA_RC_VBR)
180 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
182 mfc_context->hrd.current_buffer_fullness = prev_bf;
186 return BRC_NO_HRD_VIOLATION;
189 int intel_mfc_brc_postpack(struct encode_state *encode_state,
190 struct intel_encoder_context *encoder_context,
193 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
194 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
195 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
196 int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
197 int curr_frame_layer_id, next_frame_layer_id;
199 int qp; // quantizer of previously encoded slice of current type
200 int qpn; // predicted quantizer for next frame of current type in integer format
201 double qpf; // predicted quantizer for next frame of current type in float format
202 double delta_qp; // QP correction
203 int target_frame_size, frame_size_next;
205 * x - how far we are from HRD buffer borders
206 * y - how far we are from target HRD buffer fullness
209 double frame_size_alpha, factor;
211 if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
212 curr_frame_layer_id = 0;
213 next_frame_layer_id = 0;
215 curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
216 next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
219 /* checking wthether HRD compliance first */
220 sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
222 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
225 next_frame_layer_id = curr_frame_layer_id;
228 mfc_context->brc.bits_prev_frame[curr_frame_layer_id] = frame_bits;
229 frame_bits = mfc_context->brc.bits_prev_frame[next_frame_layer_id];
231 mfc_context->brc.prev_slice_type[curr_frame_layer_id] = slicetype;
232 slicetype = mfc_context->brc.prev_slice_type[next_frame_layer_id];
234 if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0)
237 factor = (double)encoder_context->brc.framerate_per_100s[next_frame_layer_id] / encoder_context->brc.framerate_per_100s[encoder_context->layer.num_layers - 1];
239 /* 0 means the next frame is the first frame of next layer */
243 qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
244 qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
245 qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
247 qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
249 target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
250 if (mfc_context->hrd.buffer_capacity < 5)
251 frame_size_alpha = 0;
253 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype] * factor;
254 if (frame_size_alpha > 30) frame_size_alpha = 30;
255 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
256 (double)(frame_size_alpha + 1.);
258 /* frame_size_next: avoiding negative number and too small value */
259 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
260 frame_size_next = (int)((double)target_frame_size * 0.25);
262 qpf = (double)qp * target_frame_size / frame_size_next;
263 qpn = (int)(qpf + 0.5);
266 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
267 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] += qpf - qpn;
268 if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] > 1.0) {
270 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
271 } else if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] < -1.0) {
273 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
276 /* making sure that QP is not changing too fast */
277 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
278 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
279 /* making sure that with QP predictions we did do not leave QPs range */
280 BRC_CLIP(qpn, 1, 51);
282 /* calculating QP delta as some function*/
283 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
285 x /= mfc_context->hrd.target_buffer_fullness;
286 y = mfc_context->hrd.current_buffer_fullness;
289 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
290 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
292 if (y < 0.01) y = 0.01;
294 else if (x < -1) x = -1;
296 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
297 qpn = (int)(qpn + delta_qp + 0.5);
299 /* making sure that with QP predictions we did do not leave QPs range */
300 BRC_CLIP(qpn, 1, 51);
302 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
303 /* correcting QPs of slices of other types */
304 if (slicetype == SLICE_TYPE_P) {
305 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
306 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
307 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
308 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
309 } else if (slicetype == SLICE_TYPE_I) {
310 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
311 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
312 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
313 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
314 } else { // SLICE_TYPE_B
315 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
316 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
317 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
318 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
320 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], 1, 51);
321 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], 1, 51);
322 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], 1, 51);
323 } else if (sts == BRC_UNDERFLOW) { // underflow
324 if (qpn <= qp) qpn = qp + 1;
327 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
329 } else if (sts == BRC_OVERFLOW) {
330 if (qpn >= qp) qpn = qp - 1;
331 if (qpn < 1) { // < 0 (?) overflow with minQP
333 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
337 mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
342 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
343 struct intel_encoder_context *encoder_context)
345 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
346 unsigned int rate_control_mode = encoder_context->rate_control_mode;
347 int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
349 // current we only support CBR mode.
350 if (rate_control_mode == VA_RC_CBR) {
351 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
352 mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
353 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
354 mfc_context->vui_hrd.i_frame_number = 0;
356 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
357 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
358 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
364 intel_mfc_hrd_context_update(struct encode_state *encode_state,
365 struct gen6_mfc_context *mfc_context)
367 mfc_context->vui_hrd.i_frame_number++;
370 int intel_mfc_interlace_check(VADriverContextP ctx,
371 struct encode_state *encode_state,
372 struct intel_encoder_context *encoder_context)
374 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
375 VAEncSliceParameterBufferH264 *pSliceParameter;
378 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
379 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
381 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
382 pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
383 mbCount += pSliceParameter->num_macroblocks;
386 if ( mbCount == ( width_in_mbs * height_in_mbs ) )
392 void intel_mfc_brc_prepare(struct encode_state *encode_state,
393 struct intel_encoder_context *encoder_context)
395 unsigned int rate_control_mode = encoder_context->rate_control_mode;
397 if (encoder_context->codec != CODEC_H264 &&
398 encoder_context->codec != CODEC_H264_MVC)
401 if (rate_control_mode == VA_RC_CBR) {
402 /*Programing bit rate control */
403 if (encoder_context->brc.need_reset) {
404 intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
405 intel_mfc_brc_init(encode_state, encoder_context);
408 /*Programing HRD control */
409 if (encoder_context->brc.need_reset)
410 intel_mfc_hrd_context_init(encode_state, encoder_context);
414 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
415 struct encode_state *encode_state,
416 struct intel_encoder_context *encoder_context,
417 struct intel_batchbuffer *slice_batch)
419 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
420 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
421 unsigned int rate_control_mode = encoder_context->rate_control_mode;
422 unsigned int skip_emul_byte_cnt;
424 if (encode_state->packed_header_data[idx]) {
425 VAEncPackedHeaderParameterBuffer *param = NULL;
426 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
427 unsigned int length_in_bits;
429 assert(encode_state->packed_header_param[idx]);
430 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
431 length_in_bits = param->bit_length;
433 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
434 mfc_context->insert_object(ctx,
437 ALIGN(length_in_bits, 32) >> 5,
438 length_in_bits & 0x1f,
442 !param->has_emulation_bytes,
446 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
448 if (encode_state->packed_header_data[idx]) {
449 VAEncPackedHeaderParameterBuffer *param = NULL;
450 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
451 unsigned int length_in_bits;
453 assert(encode_state->packed_header_param[idx]);
454 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
455 length_in_bits = param->bit_length;
457 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
459 mfc_context->insert_object(ctx,
462 ALIGN(length_in_bits, 32) >> 5,
463 length_in_bits & 0x1f,
467 !param->has_emulation_bytes,
471 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
473 if (encode_state->packed_header_data[idx]) {
474 VAEncPackedHeaderParameterBuffer *param = NULL;
475 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
476 unsigned int length_in_bits;
478 assert(encode_state->packed_header_param[idx]);
479 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
480 length_in_bits = param->bit_length;
482 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
483 mfc_context->insert_object(ctx,
486 ALIGN(length_in_bits, 32) >> 5,
487 length_in_bits & 0x1f,
491 !param->has_emulation_bytes,
493 } else if (rate_control_mode == VA_RC_CBR) {
495 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
497 unsigned char *sei_data = NULL;
499 int length_in_bits = build_avc_sei_buffer_timing(
500 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
501 mfc_context->vui_hrd.i_initial_cpb_removal_delay,
503 mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
504 mfc_context->vui_hrd.i_dpb_output_delay_length,
507 mfc_context->insert_object(ctx,
509 (unsigned int *)sei_data,
510 ALIGN(length_in_bits, 32) >> 5,
511 length_in_bits & 0x1f,
521 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
522 struct encode_state *encode_state,
523 struct intel_encoder_context *encoder_context)
525 struct i965_driver_data *i965 = i965_driver_data(ctx);
526 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
527 struct object_surface *obj_surface;
528 struct object_buffer *obj_buffer;
529 GenAvcSurface *gen6_avc_surface;
531 VAStatus vaStatus = VA_STATUS_SUCCESS;
532 int i, j, enable_avc_ildb = 0;
533 VAEncSliceParameterBufferH264 *slice_param;
534 struct i965_coded_buffer_segment *coded_buffer_segment;
535 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
536 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
537 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
539 if (IS_GEN6(i965->intel.device_info)) {
540 /* On the SNB it should be fixed to 128 for the DMV buffer */
544 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
545 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
546 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
548 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
549 assert((slice_param->slice_type == SLICE_TYPE_I) ||
550 (slice_param->slice_type == SLICE_TYPE_SI) ||
551 (slice_param->slice_type == SLICE_TYPE_P) ||
552 (slice_param->slice_type == SLICE_TYPE_SP) ||
553 (slice_param->slice_type == SLICE_TYPE_B));
555 if (slice_param->disable_deblocking_filter_idc != 1) {
564 /*Setup all the input&output object*/
566 /* Setup current frame and current direct mv buffer*/
567 obj_surface = encode_state->reconstructed_object;
568 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
570 if ( obj_surface->private_data == NULL) {
571 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
572 assert(gen6_avc_surface);
573 gen6_avc_surface->dmv_top =
574 dri_bo_alloc(i965->intel.bufmgr,
576 68 * width_in_mbs * height_in_mbs,
578 gen6_avc_surface->dmv_bottom =
579 dri_bo_alloc(i965->intel.bufmgr,
581 68 * width_in_mbs * height_in_mbs,
583 assert(gen6_avc_surface->dmv_top);
584 assert(gen6_avc_surface->dmv_bottom);
585 obj_surface->private_data = (void *)gen6_avc_surface;
586 obj_surface->free_private_data = (void *)gen_free_avc_surface;
588 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
589 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
590 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
591 dri_bo_reference(gen6_avc_surface->dmv_top);
592 dri_bo_reference(gen6_avc_surface->dmv_bottom);
594 if (enable_avc_ildb) {
595 mfc_context->post_deblocking_output.bo = obj_surface->bo;
596 dri_bo_reference(mfc_context->post_deblocking_output.bo);
598 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
599 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
602 mfc_context->surface_state.width = obj_surface->orig_width;
603 mfc_context->surface_state.height = obj_surface->orig_height;
604 mfc_context->surface_state.w_pitch = obj_surface->width;
605 mfc_context->surface_state.h_pitch = obj_surface->height;
607 /* Setup reference frames and direct mv buffers*/
608 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
609 obj_surface = encode_state->reference_objects[i];
611 if (obj_surface && obj_surface->bo) {
612 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
613 dri_bo_reference(obj_surface->bo);
615 /* Check DMV buffer */
616 if ( obj_surface->private_data == NULL) {
618 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
619 assert(gen6_avc_surface);
620 gen6_avc_surface->dmv_top =
621 dri_bo_alloc(i965->intel.bufmgr,
623 68 * width_in_mbs * height_in_mbs,
625 gen6_avc_surface->dmv_bottom =
626 dri_bo_alloc(i965->intel.bufmgr,
628 68 * width_in_mbs * height_in_mbs,
630 assert(gen6_avc_surface->dmv_top);
631 assert(gen6_avc_surface->dmv_bottom);
632 obj_surface->private_data = gen6_avc_surface;
633 obj_surface->free_private_data = gen_free_avc_surface;
636 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
637 /* Setup DMV buffer */
638 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
639 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
640 dri_bo_reference(gen6_avc_surface->dmv_top);
641 dri_bo_reference(gen6_avc_surface->dmv_bottom);
647 mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
648 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
650 obj_buffer = encode_state->coded_buf_object;
651 bo = obj_buffer->buffer_store->bo;
652 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
653 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
654 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
655 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
658 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
659 coded_buffer_segment->mapped = 0;
660 coded_buffer_segment->codec = encoder_context->codec;
666 * The LUT uses the pair of 4-bit units: (shift, base) structure.
668 * So it is necessary to convert one cost into the nearest LUT format.
670 * 2^K *x = 2^n * (1 + deltaX)
671 * k + log2(x) = n + log2(1 + deltaX)
672 * log2(x) = n - k + log2(1 + deltaX)
673 * As X is in the range of [1, 15]
674 * 4 > n - k + log2(1 + deltaX) >= 0
675 * => n + log2(1 + deltaX) >= k > n - 4 + log2(1 + deltaX)
676 * Then we can derive the corresponding K and get the nearest LUT format.
678 int intel_format_lutvalue(int value, int max)
681 int logvalue, temp1, temp2;
686 logvalue = (int)(log2f((float)value));
690 int error, temp_value, base, j, temp_err;
692 j = logvalue - 4 + 1;
694 for(; j <= logvalue; j++) {
698 base = (value + (1 << (j - 1)) - 1) >> j;
703 temp_value = base << j;
704 temp_err = abs(value - temp_value);
705 if (temp_err < error) {
707 ret = (j << 4) | base;
713 temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
714 temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
723 #define VP8_QP_MAX 128
726 static float intel_lambda_qp(int qp)
728 float value, lambdaf;
730 value = value / 6 - 2;
733 lambdaf = roundf(powf(2, value));
738 void intel_h264_calc_mbmvcost_qp(int qp,
740 uint8_t *vme_state_message)
742 int m_cost, j, mv_count;
743 float lambda, m_costf;
745 assert(qp <= QP_MAX);
746 lambda = intel_lambda_qp(qp);
749 vme_state_message[MODE_CHROMA_INTRA] = 0;
750 vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
752 if (slice_type == SLICE_TYPE_I) {
753 vme_state_message[MODE_INTRA_16X16] = 0;
755 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
756 m_cost = lambda * 16;
757 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
759 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
762 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
763 for (j = 1; j < 3; j++) {
764 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
765 m_cost = (int)m_costf;
766 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
769 for (j = 4; j <= 64; j *= 2) {
770 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
771 m_cost = (int)m_costf;
772 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
777 vme_state_message[MODE_INTRA_16X16] = 0x4a;
778 vme_state_message[MODE_INTRA_8X8] = 0x4a;
779 vme_state_message[MODE_INTRA_4X4] = 0x4a;
780 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
781 vme_state_message[MODE_INTER_16X16] = 0x4a;
782 vme_state_message[MODE_INTER_16X8] = 0x4a;
783 vme_state_message[MODE_INTER_8X8] = 0x4a;
784 vme_state_message[MODE_INTER_8X4] = 0x4a;
785 vme_state_message[MODE_INTER_4X4] = 0x4a;
786 vme_state_message[MODE_INTER_BWD] = 0x2a;
789 m_costf = lambda * 10;
790 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
791 m_cost = lambda * 14;
792 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
793 m_cost = lambda * 24;
794 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
795 m_costf = lambda * 3.5;
797 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
798 if (slice_type == SLICE_TYPE_P) {
799 m_costf = lambda * 2.5;
801 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
802 m_costf = lambda * 4;
804 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
805 m_costf = lambda * 1.5;
807 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
808 m_costf = lambda * 3;
810 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
811 m_costf = lambda * 5;
813 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
814 /* BWD is not used in P-frame */
815 vme_state_message[MODE_INTER_BWD] = 0;
817 m_costf = lambda * 2.5;
819 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
820 m_costf = lambda * 5.5;
822 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
823 m_costf = lambda * 3.5;
825 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
826 m_costf = lambda * 5.0;
828 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
829 m_costf = lambda * 6.5;
831 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
832 m_costf = lambda * 1.5;
834 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
840 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
841 struct encode_state *encode_state,
842 struct intel_encoder_context *encoder_context)
844 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
845 struct gen6_vme_context *vme_context = encoder_context->vme_context;
846 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
847 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
849 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
851 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
853 if (encoder_context->rate_control_mode == VA_RC_CQP)
854 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
856 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
858 if (vme_state_message == NULL)
861 intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
864 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
865 struct encode_state *encode_state,
866 struct intel_encoder_context *encoder_context)
868 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
869 struct gen6_vme_context *vme_context = encoder_context->vme_context;
870 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
871 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
872 int qp, m_cost, j, mv_count;
873 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
874 float lambda, m_costf;
876 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
877 int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
879 if (vme_state_message == NULL)
882 if (encoder_context->rate_control_mode == VA_RC_CQP)
883 qp = q_matrix->quantization_index[0];
885 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
887 lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
890 vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
893 vme_state_message[MODE_INTRA_16X16] = 0;
894 m_cost = lambda * 16;
895 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
897 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
900 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
901 for (j = 1; j < 3; j++) {
902 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
903 m_cost = (int)m_costf;
904 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
907 for (j = 4; j <= 64; j *= 2) {
908 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
909 m_cost = (int)m_costf;
910 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
915 vme_state_message[MODE_INTRA_16X16] = 0x4a;
916 vme_state_message[MODE_INTRA_4X4] = 0x4a;
917 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
918 vme_state_message[MODE_INTER_16X16] = 0x4a;
919 vme_state_message[MODE_INTER_16X8] = 0x4a;
920 vme_state_message[MODE_INTER_8X8] = 0x4a;
921 vme_state_message[MODE_INTER_4X4] = 0x4a;
922 vme_state_message[MODE_INTER_BWD] = 0;
925 m_costf = lambda * 10;
926 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
927 m_cost = lambda * 24;
928 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
930 m_costf = lambda * 3.5;
932 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
934 m_costf = lambda * 2.5;
936 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
937 m_costf = lambda * 4;
939 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
940 m_costf = lambda * 1.5;
942 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
943 m_costf = lambda * 5;
945 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
946 /* BWD is not used in P-frame */
947 vme_state_message[MODE_INTER_BWD] = 0;
951 #define MB_SCOREBOARD_A (1 << 0)
952 #define MB_SCOREBOARD_B (1 << 1)
953 #define MB_SCOREBOARD_C (1 << 2)
955 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
957 vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
958 vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
959 vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
963 /* In VME prediction the current mb depends on the neighbour
964 * A/B/C macroblock. So the left/up/up-right dependency should
967 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
968 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
969 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
970 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
971 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
972 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
974 vme_context->gpe_context.vfe_desc7.dword = 0;
978 /* check whether the mb of (x_index, y_index) is out of bound */
979 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
982 if (x_index < 0 || x_index >= mb_width)
984 if (y_index < 0 || y_index >= mb_height)
987 mb_index = y_index * mb_width + x_index;
988 if (mb_index < first_mb || mb_index > (first_mb + num_mb))
994 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
995 struct encode_state *encode_state,
996 int mb_width, int mb_height,
998 int transform_8x8_mode_flag,
999 struct intel_encoder_context *encoder_context)
1001 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1004 unsigned int *command_ptr;
1005 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1006 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1007 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1008 int qp,qp_mb,qp_index;
1009 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1011 if (encoder_context->rate_control_mode == VA_RC_CQP)
1012 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1014 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1016 #define USE_SCOREBOARD (1 << 21)
1018 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1019 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1021 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1022 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1023 int first_mb = pSliceParameter->macroblock_address;
1024 int num_mb = pSliceParameter->num_macroblocks;
1025 unsigned int mb_intra_ub, score_dep;
1026 int x_outer, y_outer, x_inner, y_inner;
1027 int xtemp_outer = 0;
1029 x_outer = first_mb % mb_width;
1030 y_outer = first_mb / mb_width;
1033 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1036 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1040 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1041 score_dep |= MB_SCOREBOARD_A;
1043 if (y_inner != mb_row) {
1044 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1045 score_dep |= MB_SCOREBOARD_B;
1047 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1048 if (x_inner != (mb_width -1)) {
1049 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1050 score_dep |= MB_SCOREBOARD_C;
1054 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1055 *command_ptr++ = kernel;
1056 *command_ptr++ = USE_SCOREBOARD;
1059 /* the (X, Y) term of scoreboard */
1060 *command_ptr++ = ((y_inner << 16) | x_inner);
1061 *command_ptr++ = score_dep;
1063 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1064 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1065 /* QP occupies one byte */
1066 if (vme_context->roi_enabled) {
1067 qp_index = y_inner * mb_width + x_inner;
1068 qp_mb = *(vme_context->qp_per_mb + qp_index);
1071 *command_ptr++ = qp_mb;
1078 xtemp_outer = mb_width - 2;
1079 if (xtemp_outer < 0)
1081 x_outer = xtemp_outer;
1082 y_outer = first_mb / mb_width;
1083 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1086 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1090 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1091 score_dep |= MB_SCOREBOARD_A;
1093 if (y_inner != mb_row) {
1094 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1095 score_dep |= MB_SCOREBOARD_B;
1097 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1099 if (x_inner != (mb_width -1)) {
1100 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1101 score_dep |= MB_SCOREBOARD_C;
1105 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1106 *command_ptr++ = kernel;
1107 *command_ptr++ = USE_SCOREBOARD;
1110 /* the (X, Y) term of scoreboard */
1111 *command_ptr++ = ((y_inner << 16) | x_inner);
1112 *command_ptr++ = score_dep;
1114 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1115 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1116 /* qp occupies one byte */
1117 if (vme_context->roi_enabled) {
1118 qp_index = y_inner * mb_width + x_inner;
1119 qp_mb = *(vme_context->qp_per_mb + qp_index);
1122 *command_ptr++ = qp_mb;
1128 if (x_outer >= mb_width) {
1130 x_outer = xtemp_outer;
1136 *command_ptr++ = MI_BATCH_BUFFER_END;
1138 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1142 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1144 unsigned int is_long_term =
1145 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1146 unsigned int is_top_field =
1147 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1148 unsigned int is_bottom_field =
1149 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1151 return ((is_long_term << 6) |
1152 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1153 (frame_store_id << 1) |
1154 ((is_top_field ^ 1) & is_bottom_field));
1158 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1159 struct encode_state *encode_state,
1160 struct intel_encoder_context *encoder_context)
1162 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1163 struct intel_batchbuffer *batch = encoder_context->base.batch;
1165 struct object_surface *obj_surface;
1166 unsigned int fref_entry, bref_entry;
1168 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1170 fref_entry = 0x80808080;
1171 bref_entry = 0x80808080;
1172 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1174 if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1175 int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1177 if (ref_idx_l0 > 3) {
1178 WARN_ONCE("ref_idx_l0 is out of range\n");
1182 obj_surface = vme_context->used_reference_objects[0];
1184 for (i = 0; i < 16; i++) {
1186 obj_surface == encode_state->reference_objects[i]) {
1191 if (frame_index == -1) {
1192 WARN_ONCE("RefPicList0 is not found in DPB!\n");
1194 int ref_idx_l0_shift = ref_idx_l0 * 8;
1195 fref_entry &= ~(0xFF << ref_idx_l0_shift);
1196 fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1200 if (slice_type == SLICE_TYPE_B) {
1201 int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1203 if (ref_idx_l1 > 3) {
1204 WARN_ONCE("ref_idx_l1 is out of range\n");
1208 obj_surface = vme_context->used_reference_objects[1];
1210 for (i = 0; i < 16; i++) {
1212 obj_surface == encode_state->reference_objects[i]) {
1217 if (frame_index == -1) {
1218 WARN_ONCE("RefPicList1 is not found in DPB!\n");
1220 int ref_idx_l1_shift = ref_idx_l1 * 8;
1221 bref_entry &= ~(0xFF << ref_idx_l1_shift);
1222 bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1226 BEGIN_BCS_BATCH(batch, 10);
1227 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1228 OUT_BCS_BATCH(batch, 0); //Select L0
1229 OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference
1230 for(i = 0; i < 7; i++) {
1231 OUT_BCS_BATCH(batch, 0x80808080);
1233 ADVANCE_BCS_BATCH(batch);
1235 BEGIN_BCS_BATCH(batch, 10);
1236 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1237 OUT_BCS_BATCH(batch, 1); //Select L1
1238 OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference
1239 for(i = 0; i < 7; i++) {
1240 OUT_BCS_BATCH(batch, 0x80808080);
1242 ADVANCE_BCS_BATCH(batch);
1246 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1247 struct encode_state *encode_state,
1248 struct intel_encoder_context *encoder_context)
1250 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1251 uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1252 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1253 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1254 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1255 uint32_t mv_x, mv_y;
1256 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1257 VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1258 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1260 if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1263 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1266 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1270 WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1275 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1276 if (pic_param->picture_type != VAEncPictureTypeIntra) {
1277 int qp, m_cost, j, mv_count;
1278 float lambda, m_costf;
1279 slice_param = (VAEncSliceParameterBufferMPEG2 *)
1280 encode_state->slice_params_ext[0]->buffer;
1281 qp = slice_param->quantiser_scale_code;
1282 lambda = intel_lambda_qp(qp);
1283 /* No Intra prediction. So it is zero */
1284 vme_state_message[MODE_INTRA_8X8] = 0;
1285 vme_state_message[MODE_INTRA_4X4] = 0;
1286 vme_state_message[MODE_INTER_MV0] = 0;
1287 for (j = 1; j < 3; j++) {
1288 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1289 m_cost = (int)m_costf;
1290 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1293 for (j = 4; j <= 64; j *= 2) {
1294 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1295 m_cost = (int)m_costf;
1296 vme_state_message[MODE_INTER_MV0 + mv_count] =
1297 intel_format_lutvalue(m_cost, 0x6f);
1301 /* It can only perform the 16x16 search. So mode cost can be ignored for
1302 * the other mode. for example: 16x8/8x8
1304 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1305 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1307 vme_state_message[MODE_INTER_16X8] = 0;
1308 vme_state_message[MODE_INTER_8X8] = 0;
1309 vme_state_message[MODE_INTER_8X4] = 0;
1310 vme_state_message[MODE_INTER_4X4] = 0;
1311 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1314 vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1316 vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1321 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1322 struct encode_state *encode_state,
1323 int mb_width, int mb_height,
1325 struct intel_encoder_context *encoder_context)
1327 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1328 unsigned int *command_ptr;
1330 #define MPEG2_SCOREBOARD (1 << 21)
1332 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1333 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1336 unsigned int mb_intra_ub, score_dep;
1337 int x_outer, y_outer, x_inner, y_inner;
1338 int xtemp_outer = 0;
1340 int num_mb = mb_width * mb_height;
1346 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1349 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1353 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1354 score_dep |= MB_SCOREBOARD_A;
1357 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1358 score_dep |= MB_SCOREBOARD_B;
1361 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1363 if (x_inner != (mb_width -1)) {
1364 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1365 score_dep |= MB_SCOREBOARD_C;
1369 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1370 *command_ptr++ = kernel;
1371 *command_ptr++ = MPEG2_SCOREBOARD;
1374 /* the (X, Y) term of scoreboard */
1375 *command_ptr++ = ((y_inner << 16) | x_inner);
1376 *command_ptr++ = score_dep;
1378 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1379 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1386 xtemp_outer = mb_width - 2;
1387 if (xtemp_outer < 0)
1389 x_outer = xtemp_outer;
1391 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1394 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1398 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1399 score_dep |= MB_SCOREBOARD_A;
1402 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1403 score_dep |= MB_SCOREBOARD_B;
1406 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1408 if (x_inner != (mb_width -1)) {
1409 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1410 score_dep |= MB_SCOREBOARD_C;
1414 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1415 *command_ptr++ = kernel;
1416 *command_ptr++ = MPEG2_SCOREBOARD;
1419 /* the (X, Y) term of scoreboard */
1420 *command_ptr++ = ((y_inner << 16) | x_inner);
1421 *command_ptr++ = score_dep;
1423 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1424 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1430 if (x_outer >= mb_width) {
1432 x_outer = xtemp_outer;
1438 *command_ptr++ = MI_BATCH_BUFFER_END;
1440 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1445 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1446 VAPictureH264 *ref_list,
1450 int i, found = -1, min = 0x7FFFFFFF;
1452 for (i = 0; i < num_pictures; i++) {
1455 if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1456 (ref_list[i].picture_id == VA_INVALID_SURFACE))
1459 tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1464 if (tmp > 0 && tmp < min) {
1474 intel_avc_vme_reference_state(VADriverContextP ctx,
1475 struct encode_state *encode_state,
1476 struct intel_encoder_context *encoder_context,
1479 void (* vme_source_surface_state)(
1480 VADriverContextP ctx,
1482 struct object_surface *obj_surface,
1483 struct intel_encoder_context *encoder_context))
1485 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1486 struct object_surface *obj_surface = NULL;
1487 struct i965_driver_data *i965 = i965_driver_data(ctx);
1488 VASurfaceID ref_surface_id;
1489 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1490 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1491 int max_num_references;
1492 VAPictureH264 *curr_pic;
1493 VAPictureH264 *ref_list;
1496 if (list_index == 0) {
1497 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1498 ref_list = slice_param->RefPicList0;
1500 max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1501 ref_list = slice_param->RefPicList1;
1504 if (max_num_references == 1) {
1505 if (list_index == 0) {
1506 ref_surface_id = slice_param->RefPicList0[0].picture_id;
1507 vme_context->used_references[0] = &slice_param->RefPicList0[0];
1509 ref_surface_id = slice_param->RefPicList1[0].picture_id;
1510 vme_context->used_references[1] = &slice_param->RefPicList1[0];
1513 if (ref_surface_id != VA_INVALID_SURFACE)
1514 obj_surface = SURFACE(ref_surface_id);
1518 obj_surface = encode_state->reference_objects[list_index];
1519 vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1524 curr_pic = &pic_param->CurrPic;
1526 /* select the reference frame in temporal space */
1527 ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1528 ref_surface_id = ref_list[ref_idx].picture_id;
1530 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1531 obj_surface = SURFACE(ref_surface_id);
1533 vme_context->used_reference_objects[list_index] = obj_surface;
1534 vme_context->used_references[list_index] = &ref_list[ref_idx];
1539 assert(ref_idx >= 0);
1540 vme_context->used_reference_objects[list_index] = obj_surface;
1541 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1542 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1547 vme_context->used_reference_objects[list_index] = NULL;
1548 vme_context->used_references[list_index] = NULL;
1549 vme_context->ref_index_in_mb[list_index] = 0;
1553 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1554 struct encode_state *encode_state,
1555 struct intel_encoder_context *encoder_context,
1557 struct intel_batchbuffer *slice_batch)
1559 int count, i, start_index;
1560 unsigned int length_in_bits;
1561 VAEncPackedHeaderParameterBuffer *param = NULL;
1562 unsigned int *header_data = NULL;
1563 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1564 int slice_header_index;
1566 if (encode_state->slice_header_index[slice_index] == 0)
1567 slice_header_index = -1;
1569 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1571 count = encode_state->slice_rawdata_count[slice_index];
1572 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1574 for (i = 0; i < count; i++) {
1575 unsigned int skip_emul_byte_cnt;
1577 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1579 param = (VAEncPackedHeaderParameterBuffer *)
1580 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1582 /* skip the slice header packed data type as it is lastly inserted */
1583 if (param->type == VAEncPackedHeaderSlice)
1586 length_in_bits = param->bit_length;
1588 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1590 /* as the slice header is still required, the last header flag is set to
1593 mfc_context->insert_object(ctx,
1596 ALIGN(length_in_bits, 32) >> 5,
1597 length_in_bits & 0x1f,
1601 !param->has_emulation_bytes,
1605 if (slice_header_index == -1) {
1606 unsigned char *slice_header = NULL;
1607 int slice_header_length_in_bits = 0;
1608 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1609 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1610 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1612 /* No slice header data is passed. And the driver needs to generate it */
1613 /* For the Normal H264 */
1614 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1618 mfc_context->insert_object(ctx, encoder_context,
1619 (unsigned int *)slice_header,
1620 ALIGN(slice_header_length_in_bits, 32) >> 5,
1621 slice_header_length_in_bits & 0x1f,
1622 5, /* first 5 bytes are start code + nal unit type */
1623 1, 0, 1, slice_batch);
1627 unsigned int skip_emul_byte_cnt;
1629 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1631 param = (VAEncPackedHeaderParameterBuffer *)
1632 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1633 length_in_bits = param->bit_length;
1635 /* as the slice header is the last header data for one slice,
1636 * the last header flag is set to one.
1638 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1640 mfc_context->insert_object(ctx,
1643 ALIGN(length_in_bits, 32) >> 5,
1644 length_in_bits & 0x1f,
1648 !param->has_emulation_bytes,
1656 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1657 struct encode_state *encode_state,
1658 struct intel_encoder_context *encoder_context)
1660 struct i965_driver_data *i965 = i965_driver_data(ctx);
1661 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1662 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1665 uint8_t *cost_table;
1667 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1670 if (slice_type == SLICE_TYPE_I) {
1671 if (vme_context->i_qp_cost_table)
1673 } else if (slice_type == SLICE_TYPE_P) {
1674 if (vme_context->p_qp_cost_table)
1677 if (vme_context->b_qp_cost_table)
1681 /* It is enough to allocate 32 bytes for each qp. */
1682 bo = dri_bo_alloc(i965->intel.bufmgr,
1688 assert(bo->virtual);
1689 cost_table = (uint8_t *)(bo->virtual);
1690 for (qp = 0; qp < QP_MAX; qp++) {
1691 intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1697 if (slice_type == SLICE_TYPE_I) {
1698 vme_context->i_qp_cost_table = bo;
1699 } else if (slice_type == SLICE_TYPE_P) {
1700 vme_context->p_qp_cost_table = bo;
1702 vme_context->b_qp_cost_table = bo;
1705 vme_context->cost_table_size = QP_MAX * 32;
1710 intel_h264_setup_cost_surface(VADriverContextP ctx,
1711 struct encode_state *encode_state,
1712 struct intel_encoder_context *encoder_context,
1713 unsigned long binding_table_offset,
1714 unsigned long surface_state_offset)
1716 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1717 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1721 struct i965_buffer_surface cost_table;
1723 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1726 if (slice_type == SLICE_TYPE_I) {
1727 bo = vme_context->i_qp_cost_table;
1728 } else if (slice_type == SLICE_TYPE_P) {
1729 bo = vme_context->p_qp_cost_table;
1731 bo = vme_context->b_qp_cost_table;
1735 cost_table.num_blocks = QP_MAX;
1736 cost_table.pitch = 16;
1737 cost_table.size_block = 32;
1739 vme_context->vme_buffer_suface_setup(ctx,
1740 &vme_context->gpe_context,
1742 binding_table_offset,
1743 surface_state_offset);
1747 * the idea of conversion between qp and qstep comes from scaling process
1748 * of transform coeff for Luma component in H264 spec.
1750 * In order to avoid too small qstep, it is multiplied by 16.
1752 static float intel_h264_qp_qstep(int qp)
1756 value = value / 6 - 2;
1757 qstep = powf(2, value);
1761 static int intel_h264_qstep_qp(float qstep)
1765 qp = 12.0f + 6.0f * log2f(qstep);
1771 * Currently it is based on the following assumption:
1772 * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1773 * total_aread * 1 / baseqp_qstep
1775 * qstep is the linearized quantizer of H264 quantizer
1778 int row_start_in_mb;
1780 int col_start_in_mb;
1790 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1792 VAEncMiscParameterBufferROI *pMiscParamROI,
1793 struct encode_state *encode_state,
1794 struct intel_encoder_context *encoder_context)
1797 VAEncROI *region_roi;
1800 ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1805 float qstep_nonroi, qstep_base;
1806 float roi_area, total_area, nonroi_area;
1809 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1810 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1811 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1812 int mbs_in_picture = width_in_mbs * height_in_mbs;
1814 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1815 VAStatus vaStatus = VA_STATUS_SUCCESS;
1817 if(pMiscParamROI != NULL)
1819 num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi;
1821 /* currently roi_value_is_qp_delta is the only supported mode of priority.
1823 * qp_delta set by user is added to base_qp, which is then clapped by
1824 * [base_qp-min_delta, base_qp+max_delta].
1826 ASSERT_RET(pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta,VA_STATUS_ERROR_INVALID_PARAMETER);
1829 /* when the base_qp is lower than 12, the quality is quite good based
1830 * on the H264 test experience.
1831 * In such case it is unnecessary to adjust the quality for ROI region.
1833 if (base_qp <= 12) {
1834 nonroi_qp = base_qp;
1841 for (i = 0; i < num_roi; i++) {
1842 int row_start, row_end, col_start, col_end;
1843 int roi_width_mbs, roi_height_mbs;
1848 region_roi = (VAEncROI *)pMiscParamROI->roi + i;
1850 col_start = region_roi->roi_rectangle.x;
1851 col_end = col_start + region_roi->roi_rectangle.width;
1852 row_start = region_roi->roi_rectangle.y;
1853 row_end = row_start + region_roi->roi_rectangle.height;
1854 col_start = col_start / 16;
1855 col_end = (col_end + 15) / 16;
1856 row_start = row_start / 16;
1857 row_end = (row_end + 15) / 16;
1859 roi_width_mbs = col_end - col_start;
1860 roi_height_mbs = row_end - row_start;
1861 mbs_in_roi = roi_width_mbs * roi_height_mbs;
1863 param_regions[i].row_start_in_mb = row_start;
1864 param_regions[i].row_end_in_mb = row_end;
1865 param_regions[i].col_start_in_mb = col_start;
1866 param_regions[i].col_end_in_mb = col_end;
1867 param_regions[i].width_mbs = roi_width_mbs;
1868 param_regions[i].height_mbs = roi_height_mbs;
1870 roi_qp = base_qp + region_roi->roi_value;
1871 BRC_CLIP(roi_qp, 1, 51);
1873 param_regions[i].roi_qp = roi_qp;
1874 qstep_roi = intel_h264_qp_qstep(roi_qp);
1876 roi_area += mbs_in_roi;
1877 sum_roi += mbs_in_roi / qstep_roi;
1880 total_area = mbs_in_picture;
1881 nonroi_area = total_area - roi_area;
1883 qstep_base = intel_h264_qp_qstep(base_qp);
1884 temp = (total_area / qstep_base - sum_roi);
1889 qstep_nonroi = nonroi_area / temp;
1890 nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
1893 BRC_CLIP(nonroi_qp, 1, 51);
1896 memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
1900 for (i = 0; i < num_roi; i++) {
1901 for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
1902 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
1903 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
1911 intel_h264_enc_roi_config(VADriverContextP ctx,
1912 struct encode_state *encode_state,
1913 struct intel_encoder_context *encoder_context)
1917 VAEncROI *region_roi;
1918 struct i965_driver_data *i965 = i965_driver_data(ctx);
1919 VAEncMiscParameterBuffer* pMiscParamROI;
1920 VAEncMiscParameterBufferROI *pParamROI = NULL;
1921 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1922 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1923 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1924 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1925 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1927 int row_start, row_end, col_start, col_end;
1930 vme_context->roi_enabled = 0;
1931 /* Restriction: Disable ROI when multi-slice is enabled */
1932 if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1935 if (encode_state->misc_param[VAEncMiscParameterTypeROI][0] != NULL) {
1936 pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI][0]->buffer;
1937 pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
1939 /* check whether number of ROI is correct */
1940 num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi;
1944 vme_context->roi_enabled = 1;
1946 if (!vme_context->roi_enabled)
1949 if ((vme_context->saved_width_mbs != width_in_mbs) ||
1950 (vme_context->saved_height_mbs != height_in_mbs)) {
1951 free(vme_context->qp_per_mb);
1952 vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1954 vme_context->saved_width_mbs = width_in_mbs;
1955 vme_context->saved_height_mbs = height_in_mbs;
1956 assert(vme_context->qp_per_mb);
1958 if (encoder_context->rate_control_mode == VA_RC_CBR) {
1960 * TODO: More complex Qp adjust needs to be added.
1961 * Currently it is initialized to slice_qp.
1963 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1965 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1967 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1968 intel_h264_enc_roi_cbr(ctx, qp, pParamROI,encode_state, encoder_context);
1970 } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1971 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1972 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1975 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1976 memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1979 for (j = num_roi; j ; j--) {
1980 int qp_delta, qp_clip;
1982 region_roi = (VAEncROI *)pParamROI->roi + j - 1;
1984 col_start = region_roi->roi_rectangle.x;
1985 col_end = col_start + region_roi->roi_rectangle.width;
1986 row_start = region_roi->roi_rectangle.y;
1987 row_end = row_start + region_roi->roi_rectangle.height;
1989 col_start = col_start / 16;
1990 col_end = (col_end + 15) / 16;
1991 row_start = row_start / 16;
1992 row_end = (row_end + 15) / 16;
1994 qp_delta = region_roi->roi_value;
1995 qp_clip = qp + qp_delta;
1997 BRC_CLIP(qp_clip, 1, 51);
1999 for (i = row_start; i < row_end; i++) {
2000 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
2001 memset(qp_ptr, qp_clip, (col_end - col_start));
2006 * TODO: Disable it for non CBR-CQP.
2008 vme_context->roi_enabled = 0;
2011 if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2012 encoder_context->soft_batch_force = 1;
2019 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2020 VAPictureHEVC *ref_list,
2024 int i, found = -1, min = 0x7FFFFFFF;
2026 for (i = 0; i < num_pictures; i++) {
2029 if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2030 (ref_list[i].picture_id == VA_INVALID_SURFACE))
2033 tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2038 if (tmp > 0 && tmp < min) {
2047 intel_hevc_vme_reference_state(VADriverContextP ctx,
2048 struct encode_state *encode_state,
2049 struct intel_encoder_context *encoder_context,
2052 void (* vme_source_surface_state)(
2053 VADriverContextP ctx,
2055 struct object_surface *obj_surface,
2056 struct intel_encoder_context *encoder_context))
2058 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2059 struct object_surface *obj_surface = NULL;
2060 struct i965_driver_data *i965 = i965_driver_data(ctx);
2061 VASurfaceID ref_surface_id;
2062 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2063 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2064 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2065 int max_num_references;
2066 VAPictureHEVC *curr_pic;
2067 VAPictureHEVC *ref_list;
2069 unsigned int is_hevc10 = 0;
2070 GenHevcSurface *hevc_encoder_surface = NULL;
2072 if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2073 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2076 if (list_index == 0) {
2077 max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2078 ref_list = slice_param->ref_pic_list0;
2080 max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2081 ref_list = slice_param->ref_pic_list1;
2084 if (max_num_references == 1) {
2085 if (list_index == 0) {
2086 ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2087 vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2089 ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2090 vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2093 if (ref_surface_id != VA_INVALID_SURFACE)
2094 obj_surface = SURFACE(ref_surface_id);
2098 obj_surface = encode_state->reference_objects[list_index];
2099 vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2104 curr_pic = &pic_param->decoded_curr_pic;
2106 /* select the reference frame in temporal space */
2107 ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2108 ref_surface_id = ref_list[ref_idx].picture_id;
2110 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2111 obj_surface = SURFACE(ref_surface_id);
2113 vme_context->used_reference_objects[list_index] = obj_surface;
2114 vme_context->used_references[list_index] = &ref_list[ref_idx];
2119 assert(ref_idx >= 0);
2120 vme_context->used_reference_objects[list_index] = obj_surface;
2123 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2124 assert(hevc_encoder_surface);
2125 obj_surface = hevc_encoder_surface->nv12_surface_obj;
2127 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2128 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2133 vme_context->used_reference_objects[list_index] = NULL;
2134 vme_context->used_references[list_index] = NULL;
2135 vme_context->ref_index_in_mb[list_index] = 0;
2139 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2140 struct encode_state *encode_state,
2141 struct intel_encoder_context *encoder_context)
2143 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2144 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2145 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2146 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2147 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2148 int qp, m_cost, j, mv_count;
2149 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2150 float lambda, m_costf;
2152 /* here no SI SP slice for HEVC, do not need slice fixup */
2153 int slice_type = slice_param->slice_type;
2156 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2158 if(encoder_context->rate_control_mode == VA_RC_CBR)
2160 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2161 if(slice_type == HEVC_SLICE_B) {
2162 if(pSequenceParameter->ip_period == 1)
2164 slice_type = HEVC_SLICE_P;
2165 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2167 }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2168 slice_type = HEVC_SLICE_P;
2169 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2175 if (vme_state_message == NULL)
2178 assert(qp <= QP_MAX);
2179 lambda = intel_lambda_qp(qp);
2180 if (slice_type == HEVC_SLICE_I) {
2181 vme_state_message[MODE_INTRA_16X16] = 0;
2182 m_cost = lambda * 4;
2183 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2184 m_cost = lambda * 16;
2185 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2186 m_cost = lambda * 3;
2187 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2190 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2191 for (j = 1; j < 3; j++) {
2192 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2193 m_cost = (int)m_costf;
2194 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2197 for (j = 4; j <= 64; j *= 2) {
2198 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2199 m_cost = (int)m_costf;
2200 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2205 vme_state_message[MODE_INTRA_16X16] = 0x4a;
2206 vme_state_message[MODE_INTRA_8X8] = 0x4a;
2207 vme_state_message[MODE_INTRA_4X4] = 0x4a;
2208 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2209 vme_state_message[MODE_INTER_16X16] = 0x4a;
2210 vme_state_message[MODE_INTER_16X8] = 0x4a;
2211 vme_state_message[MODE_INTER_8X8] = 0x4a;
2212 vme_state_message[MODE_INTER_8X4] = 0x4a;
2213 vme_state_message[MODE_INTER_4X4] = 0x4a;
2214 vme_state_message[MODE_INTER_BWD] = 0x2a;
2217 m_costf = lambda * 10;
2218 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2219 m_cost = lambda * 14;
2220 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2221 m_cost = lambda * 24;
2222 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2223 m_costf = lambda * 3.5;
2225 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2226 if (slice_type == HEVC_SLICE_P) {
2227 m_costf = lambda * 2.5;
2229 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2230 m_costf = lambda * 4;
2232 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2233 m_costf = lambda * 1.5;
2235 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2236 m_costf = lambda * 3;
2238 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2239 m_costf = lambda * 5;
2241 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2242 /* BWD is not used in P-frame */
2243 vme_state_message[MODE_INTER_BWD] = 0;
2245 m_costf = lambda * 2.5;
2247 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2248 m_costf = lambda * 5.5;
2250 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2251 m_costf = lambda * 3.5;
2253 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2254 m_costf = lambda * 5.0;
2256 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2257 m_costf = lambda * 6.5;
2259 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2260 m_costf = lambda * 1.5;
2262 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);