2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
48 #define log2f(x) (logf(x)/(float)M_LN2)
51 int intel_avc_enc_slice_type_fixup(int slice_type)
53 if (slice_type == SLICE_TYPE_SP ||
54 slice_type == SLICE_TYPE_P)
55 slice_type = SLICE_TYPE_P;
56 else if (slice_type == SLICE_TYPE_SI ||
57 slice_type == SLICE_TYPE_I)
58 slice_type = SLICE_TYPE_I;
60 if (slice_type != SLICE_TYPE_B)
61 WARN_ONCE("Invalid slice type for H.264 encoding!\n");
63 slice_type = SLICE_TYPE_B;
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
71 struct intel_encoder_context *encoder_context)
73 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
76 for(i = 0 ; i < 3; i++) {
77 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79 mfc_context->bit_rate_control_context[i].GrowInit = 6;
80 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
84 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94 struct intel_encoder_context* encoder_context)
96 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97 double bitrate, framerate;
98 double qp1_size = 0.1 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99 double qp51_size = 0.001 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
101 int inum = encoder_context->brc.num_iframes_in_gop,
102 pnum = encoder_context->brc.num_pframes_in_gop,
103 bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
104 int intra_period = encoder_context->brc.gop_size;
107 mfc_context->brc.mode = encoder_context->rate_control_mode;
109 mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
110 mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
111 mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
113 mfc_context->hrd.buffer_size = encoder_context->brc.hrd_buffer_size;
114 mfc_context->hrd.current_buffer_fullness =
115 (double)(encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
116 encoder_context->brc.hrd_initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
117 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
118 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
119 mfc_context->hrd.violation_noted = 0;
121 for (i = 0; i < encoder_context->layer.num_layers; i++) {
122 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
123 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
124 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
127 bitrate = encoder_context->brc.bits_per_second[0];
128 framerate = (double)encoder_context->brc.framerate_per_100s[0] / 100.0;
130 bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
131 framerate = (double)(encoder_context->brc.framerate_per_100s[i] - encoder_context->brc.framerate_per_100s[i - 1]) / 100.0;
134 if (i == encoder_context->layer.num_layers - 1)
137 factor = (double)encoder_context->brc.framerate_per_100s[i] / encoder_context->brc.framerate_per_100s[i + 1];
139 mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period * factor)/framerate) /
140 (double)(inum + BRC_PWEIGHT * pnum * factor + BRC_BWEIGHT * bnum * factor));
141 mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
142 mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
144 bpf = mfc_context->brc.bits_per_frame[i] = bitrate/framerate;
146 if ((bpf > qp51_size) && (bpf < qp1_size)) {
147 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
149 else if (bpf >= qp1_size)
150 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
151 else if (bpf <= qp51_size)
152 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
154 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
155 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
157 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], 1, 51);
158 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], 1, 51);
159 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], 1, 51);
163 int intel_mfc_update_hrd(struct encode_state *encode_state,
164 struct intel_encoder_context *encoder_context,
167 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
168 double prev_bf = mfc_context->hrd.current_buffer_fullness;
170 mfc_context->hrd.current_buffer_fullness -= frame_bits;
172 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
173 mfc_context->hrd.current_buffer_fullness = prev_bf;
174 return BRC_UNDERFLOW;
177 mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame[encoder_context->layer.curr_frame_layer_id];
178 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
179 if (mfc_context->brc.mode == VA_RC_VBR)
180 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
182 mfc_context->hrd.current_buffer_fullness = prev_bf;
186 return BRC_NO_HRD_VIOLATION;
189 int intel_mfc_brc_postpack(struct encode_state *encode_state,
190 struct intel_encoder_context *encoder_context,
193 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
194 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
195 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
196 int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
197 int curr_frame_layer_id, next_frame_layer_id;
199 int qp; // quantizer of previously encoded slice of current type
200 int qpn; // predicted quantizer for next frame of current type in integer format
201 double qpf; // predicted quantizer for next frame of current type in float format
202 double delta_qp; // QP correction
203 int target_frame_size, frame_size_next;
205 * x - how far we are from HRD buffer borders
206 * y - how far we are from target HRD buffer fullness
209 double frame_size_alpha, factor;
211 if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
212 curr_frame_layer_id = 0;
213 next_frame_layer_id = 0;
215 curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
216 next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
219 /* checking wthether HRD compliance first */
220 sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
222 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
225 next_frame_layer_id = curr_frame_layer_id;
228 if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0)
231 factor = (double)encoder_context->brc.framerate_per_100s[next_frame_layer_id] / encoder_context->brc.framerate_per_100s[encoder_context->layer.num_layers - 1];
233 qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
234 qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
235 qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
237 qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
239 target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
240 if (mfc_context->hrd.buffer_capacity < 5)
241 frame_size_alpha = 0;
243 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype] * factor;
244 if (frame_size_alpha > 30) frame_size_alpha = 30;
245 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
246 (double)(frame_size_alpha + 1.);
248 /* frame_size_next: avoiding negative number and too small value */
249 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
250 frame_size_next = (int)((double)target_frame_size * 0.25);
252 qpf = (double)qp * target_frame_size / frame_size_next;
253 qpn = (int)(qpf + 0.5);
256 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
257 mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
258 if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
260 mfc_context->brc.qpf_rounding_accumulator = 0.;
261 } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
263 mfc_context->brc.qpf_rounding_accumulator = 0.;
266 /* making sure that QP is not changing too fast */
267 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
268 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
269 /* making sure that with QP predictions we did do not leave QPs range */
270 BRC_CLIP(qpn, 1, 51);
272 /* calculating QP delta as some function*/
273 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
275 x /= mfc_context->hrd.target_buffer_fullness;
276 y = mfc_context->hrd.current_buffer_fullness;
279 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
280 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
282 if (y < 0.01) y = 0.01;
284 else if (x < -1) x = -1;
286 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
287 qpn = (int)(qpn + delta_qp + 0.5);
289 /* making sure that with QP predictions we did do not leave QPs range */
290 BRC_CLIP(qpn, 1, 51);
292 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
293 /* correcting QPs of slices of other types */
294 if (slicetype == SLICE_TYPE_P) {
295 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
296 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
297 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
298 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
299 } else if (slicetype == SLICE_TYPE_I) {
300 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
301 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
302 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
303 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
304 } else { // SLICE_TYPE_B
305 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
306 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
307 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
308 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
310 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], 1, 51);
311 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], 1, 51);
312 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], 1, 51);
313 } else if (sts == BRC_UNDERFLOW) { // underflow
314 if (qpn <= qp) qpn = qp + 1;
317 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
319 } else if (sts == BRC_OVERFLOW) {
320 if (qpn >= qp) qpn = qp - 1;
321 if (qpn < 1) { // < 0 (?) overflow with minQP
323 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
327 mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
332 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
333 struct intel_encoder_context *encoder_context)
335 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
336 unsigned int rate_control_mode = encoder_context->rate_control_mode;
337 int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
339 // current we only support CBR mode.
340 if (rate_control_mode == VA_RC_CBR) {
341 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
342 mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
343 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
344 mfc_context->vui_hrd.i_frame_number = 0;
346 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
347 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
348 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
354 intel_mfc_hrd_context_update(struct encode_state *encode_state,
355 struct gen6_mfc_context *mfc_context)
357 mfc_context->vui_hrd.i_frame_number++;
360 int intel_mfc_interlace_check(VADriverContextP ctx,
361 struct encode_state *encode_state,
362 struct intel_encoder_context *encoder_context)
364 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
365 VAEncSliceParameterBufferH264 *pSliceParameter;
368 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
369 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
371 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
372 pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
373 mbCount += pSliceParameter->num_macroblocks;
376 if ( mbCount == ( width_in_mbs * height_in_mbs ) )
382 void intel_mfc_brc_prepare(struct encode_state *encode_state,
383 struct intel_encoder_context *encoder_context)
385 unsigned int rate_control_mode = encoder_context->rate_control_mode;
387 if (encoder_context->codec != CODEC_H264 &&
388 encoder_context->codec != CODEC_H264_MVC)
391 if (rate_control_mode == VA_RC_CBR) {
392 /*Programing bit rate control */
393 if (encoder_context->brc.need_reset) {
394 intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
395 intel_mfc_brc_init(encode_state, encoder_context);
398 /*Programing HRD control */
399 if (encoder_context->brc.need_reset)
400 intel_mfc_hrd_context_init(encode_state, encoder_context);
404 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
405 struct encode_state *encode_state,
406 struct intel_encoder_context *encoder_context,
407 struct intel_batchbuffer *slice_batch)
409 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
410 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
411 unsigned int rate_control_mode = encoder_context->rate_control_mode;
412 unsigned int skip_emul_byte_cnt;
414 if (encode_state->packed_header_data[idx]) {
415 VAEncPackedHeaderParameterBuffer *param = NULL;
416 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
417 unsigned int length_in_bits;
419 assert(encode_state->packed_header_param[idx]);
420 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
421 length_in_bits = param->bit_length;
423 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
424 mfc_context->insert_object(ctx,
427 ALIGN(length_in_bits, 32) >> 5,
428 length_in_bits & 0x1f,
432 !param->has_emulation_bytes,
436 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
438 if (encode_state->packed_header_data[idx]) {
439 VAEncPackedHeaderParameterBuffer *param = NULL;
440 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
441 unsigned int length_in_bits;
443 assert(encode_state->packed_header_param[idx]);
444 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
445 length_in_bits = param->bit_length;
447 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
449 mfc_context->insert_object(ctx,
452 ALIGN(length_in_bits, 32) >> 5,
453 length_in_bits & 0x1f,
457 !param->has_emulation_bytes,
461 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
463 if (encode_state->packed_header_data[idx]) {
464 VAEncPackedHeaderParameterBuffer *param = NULL;
465 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
466 unsigned int length_in_bits;
468 assert(encode_state->packed_header_param[idx]);
469 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
470 length_in_bits = param->bit_length;
472 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
473 mfc_context->insert_object(ctx,
476 ALIGN(length_in_bits, 32) >> 5,
477 length_in_bits & 0x1f,
481 !param->has_emulation_bytes,
483 } else if (rate_control_mode == VA_RC_CBR) {
485 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
487 unsigned char *sei_data = NULL;
489 int length_in_bits = build_avc_sei_buffer_timing(
490 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
491 mfc_context->vui_hrd.i_initial_cpb_removal_delay,
493 mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
494 mfc_context->vui_hrd.i_dpb_output_delay_length,
497 mfc_context->insert_object(ctx,
499 (unsigned int *)sei_data,
500 ALIGN(length_in_bits, 32) >> 5,
501 length_in_bits & 0x1f,
511 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
512 struct encode_state *encode_state,
513 struct intel_encoder_context *encoder_context)
515 struct i965_driver_data *i965 = i965_driver_data(ctx);
516 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
517 struct object_surface *obj_surface;
518 struct object_buffer *obj_buffer;
519 GenAvcSurface *gen6_avc_surface;
521 VAStatus vaStatus = VA_STATUS_SUCCESS;
522 int i, j, enable_avc_ildb = 0;
523 VAEncSliceParameterBufferH264 *slice_param;
524 struct i965_coded_buffer_segment *coded_buffer_segment;
525 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
526 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
527 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
529 if (IS_GEN6(i965->intel.device_info)) {
530 /* On the SNB it should be fixed to 128 for the DMV buffer */
534 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
535 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
536 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
538 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
539 assert((slice_param->slice_type == SLICE_TYPE_I) ||
540 (slice_param->slice_type == SLICE_TYPE_SI) ||
541 (slice_param->slice_type == SLICE_TYPE_P) ||
542 (slice_param->slice_type == SLICE_TYPE_SP) ||
543 (slice_param->slice_type == SLICE_TYPE_B));
545 if (slice_param->disable_deblocking_filter_idc != 1) {
554 /*Setup all the input&output object*/
556 /* Setup current frame and current direct mv buffer*/
557 obj_surface = encode_state->reconstructed_object;
558 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
560 if ( obj_surface->private_data == NULL) {
561 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
562 assert(gen6_avc_surface);
563 gen6_avc_surface->dmv_top =
564 dri_bo_alloc(i965->intel.bufmgr,
566 68 * width_in_mbs * height_in_mbs,
568 gen6_avc_surface->dmv_bottom =
569 dri_bo_alloc(i965->intel.bufmgr,
571 68 * width_in_mbs * height_in_mbs,
573 assert(gen6_avc_surface->dmv_top);
574 assert(gen6_avc_surface->dmv_bottom);
575 obj_surface->private_data = (void *)gen6_avc_surface;
576 obj_surface->free_private_data = (void *)gen_free_avc_surface;
578 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
579 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
580 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
581 dri_bo_reference(gen6_avc_surface->dmv_top);
582 dri_bo_reference(gen6_avc_surface->dmv_bottom);
584 if (enable_avc_ildb) {
585 mfc_context->post_deblocking_output.bo = obj_surface->bo;
586 dri_bo_reference(mfc_context->post_deblocking_output.bo);
588 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
589 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
592 mfc_context->surface_state.width = obj_surface->orig_width;
593 mfc_context->surface_state.height = obj_surface->orig_height;
594 mfc_context->surface_state.w_pitch = obj_surface->width;
595 mfc_context->surface_state.h_pitch = obj_surface->height;
597 /* Setup reference frames and direct mv buffers*/
598 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
599 obj_surface = encode_state->reference_objects[i];
601 if (obj_surface && obj_surface->bo) {
602 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
603 dri_bo_reference(obj_surface->bo);
605 /* Check DMV buffer */
606 if ( obj_surface->private_data == NULL) {
608 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
609 assert(gen6_avc_surface);
610 gen6_avc_surface->dmv_top =
611 dri_bo_alloc(i965->intel.bufmgr,
613 68 * width_in_mbs * height_in_mbs,
615 gen6_avc_surface->dmv_bottom =
616 dri_bo_alloc(i965->intel.bufmgr,
618 68 * width_in_mbs * height_in_mbs,
620 assert(gen6_avc_surface->dmv_top);
621 assert(gen6_avc_surface->dmv_bottom);
622 obj_surface->private_data = gen6_avc_surface;
623 obj_surface->free_private_data = gen_free_avc_surface;
626 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
627 /* Setup DMV buffer */
628 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
629 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
630 dri_bo_reference(gen6_avc_surface->dmv_top);
631 dri_bo_reference(gen6_avc_surface->dmv_bottom);
637 mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
638 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
640 obj_buffer = encode_state->coded_buf_object;
641 bo = obj_buffer->buffer_store->bo;
642 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
643 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
644 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
645 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
648 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
649 coded_buffer_segment->mapped = 0;
650 coded_buffer_segment->codec = encoder_context->codec;
656 * The LUT uses the pair of 4-bit units: (shift, base) structure.
658 * So it is necessary to convert one cost into the nearest LUT format.
660 * 2^K *x = 2^n * (1 + deltaX)
661 * k + log2(x) = n + log2(1 + deltaX)
662 * log2(x) = n - k + log2(1 + deltaX)
663 * As X is in the range of [1, 15]
664 * 4 > n - k + log2(1 + deltaX) >= 0
665 * => n + log2(1 + deltaX) >= k > n - 4 + log2(1 + deltaX)
666 * Then we can derive the corresponding K and get the nearest LUT format.
668 int intel_format_lutvalue(int value, int max)
671 int logvalue, temp1, temp2;
676 logvalue = (int)(log2f((float)value));
680 int error, temp_value, base, j, temp_err;
682 j = logvalue - 4 + 1;
684 for(; j <= logvalue; j++) {
688 base = (value + (1 << (j - 1)) - 1) >> j;
693 temp_value = base << j;
694 temp_err = abs(value - temp_value);
695 if (temp_err < error) {
697 ret = (j << 4) | base;
703 temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
704 temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
713 #define VP8_QP_MAX 128
716 static float intel_lambda_qp(int qp)
718 float value, lambdaf;
720 value = value / 6 - 2;
723 lambdaf = roundf(powf(2, value));
728 void intel_h264_calc_mbmvcost_qp(int qp,
730 uint8_t *vme_state_message)
732 int m_cost, j, mv_count;
733 float lambda, m_costf;
735 assert(qp <= QP_MAX);
736 lambda = intel_lambda_qp(qp);
739 vme_state_message[MODE_CHROMA_INTRA] = 0;
740 vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
742 if (slice_type == SLICE_TYPE_I) {
743 vme_state_message[MODE_INTRA_16X16] = 0;
745 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
746 m_cost = lambda * 16;
747 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
749 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
752 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
753 for (j = 1; j < 3; j++) {
754 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
755 m_cost = (int)m_costf;
756 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
759 for (j = 4; j <= 64; j *= 2) {
760 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
761 m_cost = (int)m_costf;
762 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
767 vme_state_message[MODE_INTRA_16X16] = 0x4a;
768 vme_state_message[MODE_INTRA_8X8] = 0x4a;
769 vme_state_message[MODE_INTRA_4X4] = 0x4a;
770 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
771 vme_state_message[MODE_INTER_16X16] = 0x4a;
772 vme_state_message[MODE_INTER_16X8] = 0x4a;
773 vme_state_message[MODE_INTER_8X8] = 0x4a;
774 vme_state_message[MODE_INTER_8X4] = 0x4a;
775 vme_state_message[MODE_INTER_4X4] = 0x4a;
776 vme_state_message[MODE_INTER_BWD] = 0x2a;
779 m_costf = lambda * 10;
780 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
781 m_cost = lambda * 14;
782 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
783 m_cost = lambda * 24;
784 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
785 m_costf = lambda * 3.5;
787 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
788 if (slice_type == SLICE_TYPE_P) {
789 m_costf = lambda * 2.5;
791 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
792 m_costf = lambda * 4;
794 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
795 m_costf = lambda * 1.5;
797 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
798 m_costf = lambda * 3;
800 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
801 m_costf = lambda * 5;
803 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
804 /* BWD is not used in P-frame */
805 vme_state_message[MODE_INTER_BWD] = 0;
807 m_costf = lambda * 2.5;
809 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
810 m_costf = lambda * 5.5;
812 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
813 m_costf = lambda * 3.5;
815 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
816 m_costf = lambda * 5.0;
818 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
819 m_costf = lambda * 6.5;
821 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
822 m_costf = lambda * 1.5;
824 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
830 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
831 struct encode_state *encode_state,
832 struct intel_encoder_context *encoder_context)
834 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
835 struct gen6_vme_context *vme_context = encoder_context->vme_context;
836 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
837 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
839 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
841 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
843 if (encoder_context->rate_control_mode == VA_RC_CQP)
844 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
846 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
848 if (vme_state_message == NULL)
851 intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
854 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
855 struct encode_state *encode_state,
856 struct intel_encoder_context *encoder_context)
858 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
859 struct gen6_vme_context *vme_context = encoder_context->vme_context;
860 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
861 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
862 int qp, m_cost, j, mv_count;
863 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
864 float lambda, m_costf;
866 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
867 int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
869 if (vme_state_message == NULL)
872 if (encoder_context->rate_control_mode == VA_RC_CQP)
873 qp = q_matrix->quantization_index[0];
875 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
877 lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
880 vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
883 vme_state_message[MODE_INTRA_16X16] = 0;
884 m_cost = lambda * 16;
885 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
887 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
890 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
891 for (j = 1; j < 3; j++) {
892 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
893 m_cost = (int)m_costf;
894 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
897 for (j = 4; j <= 64; j *= 2) {
898 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
899 m_cost = (int)m_costf;
900 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
905 vme_state_message[MODE_INTRA_16X16] = 0x4a;
906 vme_state_message[MODE_INTRA_4X4] = 0x4a;
907 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
908 vme_state_message[MODE_INTER_16X16] = 0x4a;
909 vme_state_message[MODE_INTER_16X8] = 0x4a;
910 vme_state_message[MODE_INTER_8X8] = 0x4a;
911 vme_state_message[MODE_INTER_4X4] = 0x4a;
912 vme_state_message[MODE_INTER_BWD] = 0;
915 m_costf = lambda * 10;
916 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
917 m_cost = lambda * 24;
918 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
920 m_costf = lambda * 3.5;
922 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
924 m_costf = lambda * 2.5;
926 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
927 m_costf = lambda * 4;
929 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
930 m_costf = lambda * 1.5;
932 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
933 m_costf = lambda * 5;
935 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
936 /* BWD is not used in P-frame */
937 vme_state_message[MODE_INTER_BWD] = 0;
941 #define MB_SCOREBOARD_A (1 << 0)
942 #define MB_SCOREBOARD_B (1 << 1)
943 #define MB_SCOREBOARD_C (1 << 2)
945 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
947 vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
948 vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
949 vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
953 /* In VME prediction the current mb depends on the neighbour
954 * A/B/C macroblock. So the left/up/up-right dependency should
957 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
958 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
959 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
960 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
961 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
962 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
964 vme_context->gpe_context.vfe_desc7.dword = 0;
968 /* check whether the mb of (x_index, y_index) is out of bound */
969 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
972 if (x_index < 0 || x_index >= mb_width)
974 if (y_index < 0 || y_index >= mb_height)
977 mb_index = y_index * mb_width + x_index;
978 if (mb_index < first_mb || mb_index > (first_mb + num_mb))
984 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
985 struct encode_state *encode_state,
986 int mb_width, int mb_height,
988 int transform_8x8_mode_flag,
989 struct intel_encoder_context *encoder_context)
991 struct gen6_vme_context *vme_context = encoder_context->vme_context;
994 unsigned int *command_ptr;
995 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
996 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
997 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
998 int qp,qp_mb,qp_index;
999 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1001 if (encoder_context->rate_control_mode == VA_RC_CQP)
1002 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1004 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1006 #define USE_SCOREBOARD (1 << 21)
1008 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1009 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1011 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1012 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1013 int first_mb = pSliceParameter->macroblock_address;
1014 int num_mb = pSliceParameter->num_macroblocks;
1015 unsigned int mb_intra_ub, score_dep;
1016 int x_outer, y_outer, x_inner, y_inner;
1017 int xtemp_outer = 0;
1019 x_outer = first_mb % mb_width;
1020 y_outer = first_mb / mb_width;
1023 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1026 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1030 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1031 score_dep |= MB_SCOREBOARD_A;
1033 if (y_inner != mb_row) {
1034 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1035 score_dep |= MB_SCOREBOARD_B;
1037 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1038 if (x_inner != (mb_width -1)) {
1039 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1040 score_dep |= MB_SCOREBOARD_C;
1044 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1045 *command_ptr++ = kernel;
1046 *command_ptr++ = USE_SCOREBOARD;
1049 /* the (X, Y) term of scoreboard */
1050 *command_ptr++ = ((y_inner << 16) | x_inner);
1051 *command_ptr++ = score_dep;
1053 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1054 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1055 /* QP occupies one byte */
1056 if (vme_context->roi_enabled) {
1057 qp_index = y_inner * mb_width + x_inner;
1058 qp_mb = *(vme_context->qp_per_mb + qp_index);
1061 *command_ptr++ = qp_mb;
1068 xtemp_outer = mb_width - 2;
1069 if (xtemp_outer < 0)
1071 x_outer = xtemp_outer;
1072 y_outer = first_mb / mb_width;
1073 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1076 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1080 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1081 score_dep |= MB_SCOREBOARD_A;
1083 if (y_inner != mb_row) {
1084 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1085 score_dep |= MB_SCOREBOARD_B;
1087 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1089 if (x_inner != (mb_width -1)) {
1090 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1091 score_dep |= MB_SCOREBOARD_C;
1095 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1096 *command_ptr++ = kernel;
1097 *command_ptr++ = USE_SCOREBOARD;
1100 /* the (X, Y) term of scoreboard */
1101 *command_ptr++ = ((y_inner << 16) | x_inner);
1102 *command_ptr++ = score_dep;
1104 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1105 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1106 /* qp occupies one byte */
1107 if (vme_context->roi_enabled) {
1108 qp_index = y_inner * mb_width + x_inner;
1109 qp_mb = *(vme_context->qp_per_mb + qp_index);
1112 *command_ptr++ = qp_mb;
1118 if (x_outer >= mb_width) {
1120 x_outer = xtemp_outer;
1126 *command_ptr++ = MI_BATCH_BUFFER_END;
1128 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1132 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1134 unsigned int is_long_term =
1135 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1136 unsigned int is_top_field =
1137 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1138 unsigned int is_bottom_field =
1139 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1141 return ((is_long_term << 6) |
1142 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1143 (frame_store_id << 1) |
1144 ((is_top_field ^ 1) & is_bottom_field));
1148 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1149 struct encode_state *encode_state,
1150 struct intel_encoder_context *encoder_context)
1152 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1153 struct intel_batchbuffer *batch = encoder_context->base.batch;
1155 struct object_surface *obj_surface;
1156 unsigned int fref_entry, bref_entry;
1158 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1160 fref_entry = 0x80808080;
1161 bref_entry = 0x80808080;
1162 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1164 if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1165 int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1167 if (ref_idx_l0 > 3) {
1168 WARN_ONCE("ref_idx_l0 is out of range\n");
1172 obj_surface = vme_context->used_reference_objects[0];
1174 for (i = 0; i < 16; i++) {
1176 obj_surface == encode_state->reference_objects[i]) {
1181 if (frame_index == -1) {
1182 WARN_ONCE("RefPicList0 is not found in DPB!\n");
1184 int ref_idx_l0_shift = ref_idx_l0 * 8;
1185 fref_entry &= ~(0xFF << ref_idx_l0_shift);
1186 fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1190 if (slice_type == SLICE_TYPE_B) {
1191 int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1193 if (ref_idx_l1 > 3) {
1194 WARN_ONCE("ref_idx_l1 is out of range\n");
1198 obj_surface = vme_context->used_reference_objects[1];
1200 for (i = 0; i < 16; i++) {
1202 obj_surface == encode_state->reference_objects[i]) {
1207 if (frame_index == -1) {
1208 WARN_ONCE("RefPicList1 is not found in DPB!\n");
1210 int ref_idx_l1_shift = ref_idx_l1 * 8;
1211 bref_entry &= ~(0xFF << ref_idx_l1_shift);
1212 bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1216 BEGIN_BCS_BATCH(batch, 10);
1217 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1218 OUT_BCS_BATCH(batch, 0); //Select L0
1219 OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference
1220 for(i = 0; i < 7; i++) {
1221 OUT_BCS_BATCH(batch, 0x80808080);
1223 ADVANCE_BCS_BATCH(batch);
1225 BEGIN_BCS_BATCH(batch, 10);
1226 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1227 OUT_BCS_BATCH(batch, 1); //Select L1
1228 OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference
1229 for(i = 0; i < 7; i++) {
1230 OUT_BCS_BATCH(batch, 0x80808080);
1232 ADVANCE_BCS_BATCH(batch);
1236 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1237 struct encode_state *encode_state,
1238 struct intel_encoder_context *encoder_context)
1240 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1241 uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1242 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1243 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1244 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1245 uint32_t mv_x, mv_y;
1246 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1247 VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1248 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1250 if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1253 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1256 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1260 WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1265 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1266 if (pic_param->picture_type != VAEncPictureTypeIntra) {
1267 int qp, m_cost, j, mv_count;
1268 float lambda, m_costf;
1269 slice_param = (VAEncSliceParameterBufferMPEG2 *)
1270 encode_state->slice_params_ext[0]->buffer;
1271 qp = slice_param->quantiser_scale_code;
1272 lambda = intel_lambda_qp(qp);
1273 /* No Intra prediction. So it is zero */
1274 vme_state_message[MODE_INTRA_8X8] = 0;
1275 vme_state_message[MODE_INTRA_4X4] = 0;
1276 vme_state_message[MODE_INTER_MV0] = 0;
1277 for (j = 1; j < 3; j++) {
1278 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1279 m_cost = (int)m_costf;
1280 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1283 for (j = 4; j <= 64; j *= 2) {
1284 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1285 m_cost = (int)m_costf;
1286 vme_state_message[MODE_INTER_MV0 + mv_count] =
1287 intel_format_lutvalue(m_cost, 0x6f);
1291 /* It can only perform the 16x16 search. So mode cost can be ignored for
1292 * the other mode. for example: 16x8/8x8
1294 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1295 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1297 vme_state_message[MODE_INTER_16X8] = 0;
1298 vme_state_message[MODE_INTER_8X8] = 0;
1299 vme_state_message[MODE_INTER_8X4] = 0;
1300 vme_state_message[MODE_INTER_4X4] = 0;
1301 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1304 vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1306 vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1311 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1312 struct encode_state *encode_state,
1313 int mb_width, int mb_height,
1315 struct intel_encoder_context *encoder_context)
1317 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1318 unsigned int *command_ptr;
1320 #define MPEG2_SCOREBOARD (1 << 21)
1322 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1323 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1326 unsigned int mb_intra_ub, score_dep;
1327 int x_outer, y_outer, x_inner, y_inner;
1328 int xtemp_outer = 0;
1330 int num_mb = mb_width * mb_height;
1336 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1339 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1343 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1344 score_dep |= MB_SCOREBOARD_A;
1347 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1348 score_dep |= MB_SCOREBOARD_B;
1351 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1353 if (x_inner != (mb_width -1)) {
1354 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1355 score_dep |= MB_SCOREBOARD_C;
1359 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1360 *command_ptr++ = kernel;
1361 *command_ptr++ = MPEG2_SCOREBOARD;
1364 /* the (X, Y) term of scoreboard */
1365 *command_ptr++ = ((y_inner << 16) | x_inner);
1366 *command_ptr++ = score_dep;
1368 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1369 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1376 xtemp_outer = mb_width - 2;
1377 if (xtemp_outer < 0)
1379 x_outer = xtemp_outer;
1381 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1384 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1388 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1389 score_dep |= MB_SCOREBOARD_A;
1392 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1393 score_dep |= MB_SCOREBOARD_B;
1396 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1398 if (x_inner != (mb_width -1)) {
1399 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1400 score_dep |= MB_SCOREBOARD_C;
1404 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1405 *command_ptr++ = kernel;
1406 *command_ptr++ = MPEG2_SCOREBOARD;
1409 /* the (X, Y) term of scoreboard */
1410 *command_ptr++ = ((y_inner << 16) | x_inner);
1411 *command_ptr++ = score_dep;
1413 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1414 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1420 if (x_outer >= mb_width) {
1422 x_outer = xtemp_outer;
1428 *command_ptr++ = MI_BATCH_BUFFER_END;
1430 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1435 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1436 VAPictureH264 *ref_list,
1440 int i, found = -1, min = 0x7FFFFFFF;
1442 for (i = 0; i < num_pictures; i++) {
1445 if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1446 (ref_list[i].picture_id == VA_INVALID_SURFACE))
1449 tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1454 if (tmp > 0 && tmp < min) {
1464 intel_avc_vme_reference_state(VADriverContextP ctx,
1465 struct encode_state *encode_state,
1466 struct intel_encoder_context *encoder_context,
1469 void (* vme_source_surface_state)(
1470 VADriverContextP ctx,
1472 struct object_surface *obj_surface,
1473 struct intel_encoder_context *encoder_context))
1475 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1476 struct object_surface *obj_surface = NULL;
1477 struct i965_driver_data *i965 = i965_driver_data(ctx);
1478 VASurfaceID ref_surface_id;
1479 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1480 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1481 int max_num_references;
1482 VAPictureH264 *curr_pic;
1483 VAPictureH264 *ref_list;
1486 if (list_index == 0) {
1487 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1488 ref_list = slice_param->RefPicList0;
1490 max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1491 ref_list = slice_param->RefPicList1;
1494 if (max_num_references == 1) {
1495 if (list_index == 0) {
1496 ref_surface_id = slice_param->RefPicList0[0].picture_id;
1497 vme_context->used_references[0] = &slice_param->RefPicList0[0];
1499 ref_surface_id = slice_param->RefPicList1[0].picture_id;
1500 vme_context->used_references[1] = &slice_param->RefPicList1[0];
1503 if (ref_surface_id != VA_INVALID_SURFACE)
1504 obj_surface = SURFACE(ref_surface_id);
1508 obj_surface = encode_state->reference_objects[list_index];
1509 vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1514 curr_pic = &pic_param->CurrPic;
1516 /* select the reference frame in temporal space */
1517 ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1518 ref_surface_id = ref_list[ref_idx].picture_id;
1520 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1521 obj_surface = SURFACE(ref_surface_id);
1523 vme_context->used_reference_objects[list_index] = obj_surface;
1524 vme_context->used_references[list_index] = &ref_list[ref_idx];
1529 assert(ref_idx >= 0);
1530 vme_context->used_reference_objects[list_index] = obj_surface;
1531 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1532 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1537 vme_context->used_reference_objects[list_index] = NULL;
1538 vme_context->used_references[list_index] = NULL;
1539 vme_context->ref_index_in_mb[list_index] = 0;
1543 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1544 struct encode_state *encode_state,
1545 struct intel_encoder_context *encoder_context,
1547 struct intel_batchbuffer *slice_batch)
1549 int count, i, start_index;
1550 unsigned int length_in_bits;
1551 VAEncPackedHeaderParameterBuffer *param = NULL;
1552 unsigned int *header_data = NULL;
1553 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1554 int slice_header_index;
1556 if (encode_state->slice_header_index[slice_index] == 0)
1557 slice_header_index = -1;
1559 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1561 count = encode_state->slice_rawdata_count[slice_index];
1562 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1564 for (i = 0; i < count; i++) {
1565 unsigned int skip_emul_byte_cnt;
1567 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1569 param = (VAEncPackedHeaderParameterBuffer *)
1570 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1572 /* skip the slice header packed data type as it is lastly inserted */
1573 if (param->type == VAEncPackedHeaderSlice)
1576 length_in_bits = param->bit_length;
1578 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1580 /* as the slice header is still required, the last header flag is set to
1583 mfc_context->insert_object(ctx,
1586 ALIGN(length_in_bits, 32) >> 5,
1587 length_in_bits & 0x1f,
1591 !param->has_emulation_bytes,
1595 if (slice_header_index == -1) {
1596 unsigned char *slice_header = NULL;
1597 int slice_header_length_in_bits = 0;
1598 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1599 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1600 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1602 /* No slice header data is passed. And the driver needs to generate it */
1603 /* For the Normal H264 */
1604 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1608 mfc_context->insert_object(ctx, encoder_context,
1609 (unsigned int *)slice_header,
1610 ALIGN(slice_header_length_in_bits, 32) >> 5,
1611 slice_header_length_in_bits & 0x1f,
1612 5, /* first 5 bytes are start code + nal unit type */
1613 1, 0, 1, slice_batch);
1617 unsigned int skip_emul_byte_cnt;
1619 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1621 param = (VAEncPackedHeaderParameterBuffer *)
1622 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1623 length_in_bits = param->bit_length;
1625 /* as the slice header is the last header data for one slice,
1626 * the last header flag is set to one.
1628 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1630 mfc_context->insert_object(ctx,
1633 ALIGN(length_in_bits, 32) >> 5,
1634 length_in_bits & 0x1f,
1638 !param->has_emulation_bytes,
1646 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1647 struct encode_state *encode_state,
1648 struct intel_encoder_context *encoder_context)
1650 struct i965_driver_data *i965 = i965_driver_data(ctx);
1651 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1652 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1655 uint8_t *cost_table;
1657 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1660 if (slice_type == SLICE_TYPE_I) {
1661 if (vme_context->i_qp_cost_table)
1663 } else if (slice_type == SLICE_TYPE_P) {
1664 if (vme_context->p_qp_cost_table)
1667 if (vme_context->b_qp_cost_table)
1671 /* It is enough to allocate 32 bytes for each qp. */
1672 bo = dri_bo_alloc(i965->intel.bufmgr,
1678 assert(bo->virtual);
1679 cost_table = (uint8_t *)(bo->virtual);
1680 for (qp = 0; qp < QP_MAX; qp++) {
1681 intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1687 if (slice_type == SLICE_TYPE_I) {
1688 vme_context->i_qp_cost_table = bo;
1689 } else if (slice_type == SLICE_TYPE_P) {
1690 vme_context->p_qp_cost_table = bo;
1692 vme_context->b_qp_cost_table = bo;
1695 vme_context->cost_table_size = QP_MAX * 32;
1700 intel_h264_setup_cost_surface(VADriverContextP ctx,
1701 struct encode_state *encode_state,
1702 struct intel_encoder_context *encoder_context,
1703 unsigned long binding_table_offset,
1704 unsigned long surface_state_offset)
1706 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1707 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1711 struct i965_buffer_surface cost_table;
1713 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1716 if (slice_type == SLICE_TYPE_I) {
1717 bo = vme_context->i_qp_cost_table;
1718 } else if (slice_type == SLICE_TYPE_P) {
1719 bo = vme_context->p_qp_cost_table;
1721 bo = vme_context->b_qp_cost_table;
1725 cost_table.num_blocks = QP_MAX;
1726 cost_table.pitch = 16;
1727 cost_table.size_block = 32;
1729 vme_context->vme_buffer_suface_setup(ctx,
1730 &vme_context->gpe_context,
1732 binding_table_offset,
1733 surface_state_offset);
1737 * the idea of conversion between qp and qstep comes from scaling process
1738 * of transform coeff for Luma component in H264 spec.
1740 * In order to avoid too small qstep, it is multiplied by 16.
1742 static float intel_h264_qp_qstep(int qp)
1746 value = value / 6 - 2;
1747 qstep = powf(2, value);
1751 static int intel_h264_qstep_qp(float qstep)
1755 qp = 12.0f + 6.0f * log2f(qstep);
1761 * Currently it is based on the following assumption:
1762 * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1763 * total_aread * 1 / baseqp_qstep
1765 * qstep is the linearized quantizer of H264 quantizer
1768 int row_start_in_mb;
1770 int col_start_in_mb;
1780 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1782 VAEncMiscParameterBufferROI *pMiscParamROI,
1783 struct encode_state *encode_state,
1784 struct intel_encoder_context *encoder_context)
1787 VAEncROI *region_roi;
1790 ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1795 float qstep_nonroi, qstep_base;
1796 float roi_area, total_area, nonroi_area;
1799 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1800 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1801 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1802 int mbs_in_picture = width_in_mbs * height_in_mbs;
1804 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1805 VAStatus vaStatus = VA_STATUS_SUCCESS;
1807 if(pMiscParamROI != NULL)
1809 num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi;
1811 /* currently roi_value_is_qp_delta is the only supported mode of priority.
1813 * qp_delta set by user is added to base_qp, which is then clapped by
1814 * [base_qp-min_delta, base_qp+max_delta].
1816 ASSERT_RET(pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta,VA_STATUS_ERROR_INVALID_PARAMETER);
1819 /* when the base_qp is lower than 12, the quality is quite good based
1820 * on the H264 test experience.
1821 * In such case it is unnecessary to adjust the quality for ROI region.
1823 if (base_qp <= 12) {
1824 nonroi_qp = base_qp;
1831 for (i = 0; i < num_roi; i++) {
1832 int row_start, row_end, col_start, col_end;
1833 int roi_width_mbs, roi_height_mbs;
1838 region_roi = (VAEncROI *)pMiscParamROI->roi + i;
1840 col_start = region_roi->roi_rectangle.x;
1841 col_end = col_start + region_roi->roi_rectangle.width;
1842 row_start = region_roi->roi_rectangle.y;
1843 row_end = row_start + region_roi->roi_rectangle.height;
1844 col_start = col_start / 16;
1845 col_end = (col_end + 15) / 16;
1846 row_start = row_start / 16;
1847 row_end = (row_end + 15) / 16;
1849 roi_width_mbs = col_end - col_start;
1850 roi_height_mbs = row_end - row_start;
1851 mbs_in_roi = roi_width_mbs * roi_height_mbs;
1853 param_regions[i].row_start_in_mb = row_start;
1854 param_regions[i].row_end_in_mb = row_end;
1855 param_regions[i].col_start_in_mb = col_start;
1856 param_regions[i].col_end_in_mb = col_end;
1857 param_regions[i].width_mbs = roi_width_mbs;
1858 param_regions[i].height_mbs = roi_height_mbs;
1860 roi_qp = base_qp + region_roi->roi_value;
1861 BRC_CLIP(roi_qp, 1, 51);
1863 param_regions[i].roi_qp = roi_qp;
1864 qstep_roi = intel_h264_qp_qstep(roi_qp);
1866 roi_area += mbs_in_roi;
1867 sum_roi += mbs_in_roi / qstep_roi;
1870 total_area = mbs_in_picture;
1871 nonroi_area = total_area - roi_area;
1873 qstep_base = intel_h264_qp_qstep(base_qp);
1874 temp = (total_area / qstep_base - sum_roi);
1879 qstep_nonroi = nonroi_area / temp;
1880 nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
1883 BRC_CLIP(nonroi_qp, 1, 51);
1886 memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
1890 for (i = 0; i < num_roi; i++) {
1891 for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
1892 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
1893 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
1901 intel_h264_enc_roi_config(VADriverContextP ctx,
1902 struct encode_state *encode_state,
1903 struct intel_encoder_context *encoder_context)
1907 VAEncROI *region_roi;
1908 struct i965_driver_data *i965 = i965_driver_data(ctx);
1909 VAEncMiscParameterBuffer* pMiscParamROI;
1910 VAEncMiscParameterBufferROI *pParamROI = NULL;
1911 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1912 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1913 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1914 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1915 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1917 int row_start, row_end, col_start, col_end;
1920 vme_context->roi_enabled = 0;
1921 /* Restriction: Disable ROI when multi-slice is enabled */
1922 if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1925 if (encode_state->misc_param[VAEncMiscParameterTypeROI][0] != NULL) {
1926 pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI][0]->buffer;
1927 pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
1929 /* check whether number of ROI is correct */
1930 num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi;
1934 vme_context->roi_enabled = 1;
1936 if (!vme_context->roi_enabled)
1939 if ((vme_context->saved_width_mbs != width_in_mbs) ||
1940 (vme_context->saved_height_mbs != height_in_mbs)) {
1941 free(vme_context->qp_per_mb);
1942 vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1944 vme_context->saved_width_mbs = width_in_mbs;
1945 vme_context->saved_height_mbs = height_in_mbs;
1946 assert(vme_context->qp_per_mb);
1948 if (encoder_context->rate_control_mode == VA_RC_CBR) {
1950 * TODO: More complex Qp adjust needs to be added.
1951 * Currently it is initialized to slice_qp.
1953 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1955 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1957 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1958 intel_h264_enc_roi_cbr(ctx, qp, pParamROI,encode_state, encoder_context);
1960 } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1961 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1962 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1965 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1966 memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1969 for (j = num_roi; j ; j--) {
1970 int qp_delta, qp_clip;
1972 region_roi = (VAEncROI *)pParamROI->roi + j - 1;
1974 col_start = region_roi->roi_rectangle.x;
1975 col_end = col_start + region_roi->roi_rectangle.width;
1976 row_start = region_roi->roi_rectangle.y;
1977 row_end = row_start + region_roi->roi_rectangle.height;
1979 col_start = col_start / 16;
1980 col_end = (col_end + 15) / 16;
1981 row_start = row_start / 16;
1982 row_end = (row_end + 15) / 16;
1984 qp_delta = region_roi->roi_value;
1985 qp_clip = qp + qp_delta;
1987 BRC_CLIP(qp_clip, 1, 51);
1989 for (i = row_start; i < row_end; i++) {
1990 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
1991 memset(qp_ptr, qp_clip, (col_end - col_start));
1996 * TODO: Disable it for non CBR-CQP.
1998 vme_context->roi_enabled = 0;
2001 if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2002 encoder_context->soft_batch_force = 1;
2009 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2010 VAPictureHEVC *ref_list,
2014 int i, found = -1, min = 0x7FFFFFFF;
2016 for (i = 0; i < num_pictures; i++) {
2019 if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2020 (ref_list[i].picture_id == VA_INVALID_SURFACE))
2023 tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2028 if (tmp > 0 && tmp < min) {
2037 intel_hevc_vme_reference_state(VADriverContextP ctx,
2038 struct encode_state *encode_state,
2039 struct intel_encoder_context *encoder_context,
2042 void (* vme_source_surface_state)(
2043 VADriverContextP ctx,
2045 struct object_surface *obj_surface,
2046 struct intel_encoder_context *encoder_context))
2048 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2049 struct object_surface *obj_surface = NULL;
2050 struct i965_driver_data *i965 = i965_driver_data(ctx);
2051 VASurfaceID ref_surface_id;
2052 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2053 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2054 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2055 int max_num_references;
2056 VAPictureHEVC *curr_pic;
2057 VAPictureHEVC *ref_list;
2059 unsigned int is_hevc10 = 0;
2060 GenHevcSurface *hevc_encoder_surface = NULL;
2062 if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2063 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2066 if (list_index == 0) {
2067 max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2068 ref_list = slice_param->ref_pic_list0;
2070 max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2071 ref_list = slice_param->ref_pic_list1;
2074 if (max_num_references == 1) {
2075 if (list_index == 0) {
2076 ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2077 vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2079 ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2080 vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2083 if (ref_surface_id != VA_INVALID_SURFACE)
2084 obj_surface = SURFACE(ref_surface_id);
2088 obj_surface = encode_state->reference_objects[list_index];
2089 vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2094 curr_pic = &pic_param->decoded_curr_pic;
2096 /* select the reference frame in temporal space */
2097 ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2098 ref_surface_id = ref_list[ref_idx].picture_id;
2100 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2101 obj_surface = SURFACE(ref_surface_id);
2103 vme_context->used_reference_objects[list_index] = obj_surface;
2104 vme_context->used_references[list_index] = &ref_list[ref_idx];
2109 assert(ref_idx >= 0);
2110 vme_context->used_reference_objects[list_index] = obj_surface;
2113 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2114 assert(hevc_encoder_surface);
2115 obj_surface = hevc_encoder_surface->nv12_surface_obj;
2117 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2118 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2123 vme_context->used_reference_objects[list_index] = NULL;
2124 vme_context->used_references[list_index] = NULL;
2125 vme_context->ref_index_in_mb[list_index] = 0;
2129 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2130 struct encode_state *encode_state,
2131 struct intel_encoder_context *encoder_context)
2133 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2134 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2135 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2136 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2137 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2138 int qp, m_cost, j, mv_count;
2139 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2140 float lambda, m_costf;
2142 /* here no SI SP slice for HEVC, do not need slice fixup */
2143 int slice_type = slice_param->slice_type;
2146 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2148 if(encoder_context->rate_control_mode == VA_RC_CBR)
2150 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2151 if(slice_type == HEVC_SLICE_B) {
2152 if(pSequenceParameter->ip_period == 1)
2154 slice_type = HEVC_SLICE_P;
2155 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2157 }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2158 slice_type = HEVC_SLICE_P;
2159 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2165 if (vme_state_message == NULL)
2168 assert(qp <= QP_MAX);
2169 lambda = intel_lambda_qp(qp);
2170 if (slice_type == HEVC_SLICE_I) {
2171 vme_state_message[MODE_INTRA_16X16] = 0;
2172 m_cost = lambda * 4;
2173 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2174 m_cost = lambda * 16;
2175 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2176 m_cost = lambda * 3;
2177 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2180 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2181 for (j = 1; j < 3; j++) {
2182 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2183 m_cost = (int)m_costf;
2184 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2187 for (j = 4; j <= 64; j *= 2) {
2188 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2189 m_cost = (int)m_costf;
2190 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2195 vme_state_message[MODE_INTRA_16X16] = 0x4a;
2196 vme_state_message[MODE_INTRA_8X8] = 0x4a;
2197 vme_state_message[MODE_INTRA_4X4] = 0x4a;
2198 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2199 vme_state_message[MODE_INTER_16X16] = 0x4a;
2200 vme_state_message[MODE_INTER_16X8] = 0x4a;
2201 vme_state_message[MODE_INTER_8X8] = 0x4a;
2202 vme_state_message[MODE_INTER_8X4] = 0x4a;
2203 vme_state_message[MODE_INTER_4X4] = 0x4a;
2204 vme_state_message[MODE_INTER_BWD] = 0x2a;
2207 m_costf = lambda * 10;
2208 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2209 m_cost = lambda * 14;
2210 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2211 m_cost = lambda * 24;
2212 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2213 m_costf = lambda * 3.5;
2215 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2216 if (slice_type == HEVC_SLICE_P) {
2217 m_costf = lambda * 2.5;
2219 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2220 m_costf = lambda * 4;
2222 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2223 m_costf = lambda * 1.5;
2225 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2226 m_costf = lambda * 3;
2228 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2229 m_costf = lambda * 5;
2231 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2232 /* BWD is not used in P-frame */
2233 vme_state_message[MODE_INTER_BWD] = 0;
2235 m_costf = lambda * 2.5;
2237 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2238 m_costf = lambda * 5.5;
2240 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2241 m_costf = lambda * 3.5;
2243 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2244 m_costf = lambda * 5.0;
2246 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2247 m_costf = lambda * 6.5;
2249 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2250 m_costf = lambda * 1.5;
2252 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);