2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
48 #define log2f(x) (logf(x)/(float)M_LN2)
51 int intel_avc_enc_slice_type_fixup(int slice_type)
53 if (slice_type == SLICE_TYPE_SP ||
54 slice_type == SLICE_TYPE_P)
55 slice_type = SLICE_TYPE_P;
56 else if (slice_type == SLICE_TYPE_SI ||
57 slice_type == SLICE_TYPE_I)
58 slice_type = SLICE_TYPE_I;
60 if (slice_type != SLICE_TYPE_B)
61 WARN_ONCE("Invalid slice type for H.264 encoding!\n");
63 slice_type = SLICE_TYPE_B;
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
71 struct intel_encoder_context *encoder_context)
73 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
76 for(i = 0 ; i < 3; i++) {
77 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79 mfc_context->bit_rate_control_context[i].GrowInit = 6;
80 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
84 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94 struct intel_encoder_context* encoder_context)
96 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97 double bitrate, framerate;
98 double frame_per_bits = 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99 double qp1_size = 0.1 * frame_per_bits;
100 double qp51_size = 0.001 * frame_per_bits;
102 int inum = encoder_context->brc.num_iframes_in_gop,
103 pnum = encoder_context->brc.num_pframes_in_gop,
104 bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
105 int intra_period = encoder_context->brc.gop_size;
108 if (encoder_context->layer.num_layers > 1)
109 qp1_size = 0.15 * frame_per_bits;
111 mfc_context->brc.mode = encoder_context->rate_control_mode;
113 mfc_context->hrd.buffer_size = encoder_context->brc.hrd_buffer_size;
114 mfc_context->hrd.current_buffer_fullness =
115 (double)(encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
116 encoder_context->brc.hrd_initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
117 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
118 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
119 mfc_context->hrd.violation_noted = 0;
121 for (i = 0; i < encoder_context->layer.num_layers; i++) {
122 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
123 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
124 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
127 bitrate = encoder_context->brc.bits_per_second[0];
128 framerate = (double)encoder_context->brc.framerate_per_100s[0] / 100.0;
130 bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
131 framerate = (double)(encoder_context->brc.framerate_per_100s[i] - encoder_context->brc.framerate_per_100s[i - 1]) / 100.0;
134 if (i == encoder_context->layer.num_layers - 1)
137 factor = (double)encoder_context->brc.framerate_per_100s[i] / encoder_context->brc.framerate_per_100s[encoder_context->layer.num_layers - 1];
139 if (encoder_context->layer.num_layers > 1) {
141 intra_period = (int)(encoder_context->brc.gop_size * factor);
143 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor);
144 bnum = intra_period - inum - pnum;
146 intra_period = (int)(encoder_context->brc.gop_size * factor) - intra_period;
148 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor) - pnum;
149 bnum = intra_period - inum - pnum;
153 mfc_context->brc.gop_nums[i][SLICE_TYPE_I] = inum;
154 mfc_context->brc.gop_nums[i][SLICE_TYPE_P] = pnum;
155 mfc_context->brc.gop_nums[i][SLICE_TYPE_B] = bnum;
157 mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
158 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
159 mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
160 mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
162 bpf = mfc_context->brc.bits_per_frame[i] = bitrate/framerate;
164 if ((bpf > qp51_size) && (bpf < qp1_size)) {
165 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
167 else if (bpf >= qp1_size)
168 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
169 else if (bpf <= qp51_size)
170 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
172 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
173 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
175 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], 1, 51);
176 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], 1, 51);
177 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], 1, 51);
181 int intel_mfc_update_hrd(struct encode_state *encode_state,
182 struct intel_encoder_context *encoder_context,
185 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
186 double prev_bf = mfc_context->hrd.current_buffer_fullness;
188 mfc_context->hrd.current_buffer_fullness -= frame_bits;
190 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
191 mfc_context->hrd.current_buffer_fullness = prev_bf;
192 return BRC_UNDERFLOW;
195 mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame[encoder_context->layer.curr_frame_layer_id];
196 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
197 if (mfc_context->brc.mode == VA_RC_VBR)
198 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
200 mfc_context->hrd.current_buffer_fullness = prev_bf;
204 return BRC_NO_HRD_VIOLATION;
207 int intel_mfc_brc_postpack(struct encode_state *encode_state,
208 struct intel_encoder_context *encoder_context,
211 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
212 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
213 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
214 int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
215 int curr_frame_layer_id, next_frame_layer_id;
217 int qp; // quantizer of previously encoded slice of current type
218 int qpn; // predicted quantizer for next frame of current type in integer format
219 double qpf; // predicted quantizer for next frame of current type in float format
220 double delta_qp; // QP correction
221 int target_frame_size, frame_size_next;
223 * x - how far we are from HRD buffer borders
224 * y - how far we are from target HRD buffer fullness
227 double frame_size_alpha;
229 if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
230 curr_frame_layer_id = 0;
231 next_frame_layer_id = 0;
233 curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
234 next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
237 /* checking wthether HRD compliance first */
238 sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
240 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
243 next_frame_layer_id = curr_frame_layer_id;
246 mfc_context->brc.bits_prev_frame[curr_frame_layer_id] = frame_bits;
247 frame_bits = mfc_context->brc.bits_prev_frame[next_frame_layer_id];
249 mfc_context->brc.prev_slice_type[curr_frame_layer_id] = slicetype;
250 slicetype = mfc_context->brc.prev_slice_type[next_frame_layer_id];
252 /* 0 means the next frame is the first frame of next layer */
256 qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
257 qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
258 qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
260 qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
262 target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
263 if (mfc_context->hrd.buffer_capacity < 5)
264 frame_size_alpha = 0;
266 frame_size_alpha = (double)mfc_context->brc.gop_nums[next_frame_layer_id][slicetype];
267 if (frame_size_alpha > 30) frame_size_alpha = 30;
268 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
269 (double)(frame_size_alpha + 1.);
271 /* frame_size_next: avoiding negative number and too small value */
272 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
273 frame_size_next = (int)((double)target_frame_size * 0.25);
275 qpf = (double)qp * target_frame_size / frame_size_next;
276 qpn = (int)(qpf + 0.5);
279 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
280 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] += qpf - qpn;
281 if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] > 1.0) {
283 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
284 } else if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] < -1.0) {
286 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
289 /* making sure that QP is not changing too fast */
290 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
291 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
292 /* making sure that with QP predictions we did do not leave QPs range */
293 BRC_CLIP(qpn, 1, 51);
295 /* calculating QP delta as some function*/
296 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
298 x /= mfc_context->hrd.target_buffer_fullness;
299 y = mfc_context->hrd.current_buffer_fullness;
302 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
303 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
305 if (y < 0.01) y = 0.01;
307 else if (x < -1) x = -1;
309 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
310 qpn = (int)(qpn + delta_qp + 0.5);
312 /* making sure that with QP predictions we did do not leave QPs range */
313 BRC_CLIP(qpn, 1, 51);
315 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
316 /* correcting QPs of slices of other types */
317 if (slicetype == SLICE_TYPE_P) {
318 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
319 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
320 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
321 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
322 } else if (slicetype == SLICE_TYPE_I) {
323 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
324 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
325 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
326 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
327 } else { // SLICE_TYPE_B
328 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
329 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
330 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
331 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
333 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], 1, 51);
334 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], 1, 51);
335 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], 1, 51);
336 } else if (sts == BRC_UNDERFLOW) { // underflow
337 if (qpn <= qp) qpn = qp + 1;
340 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
342 } else if (sts == BRC_OVERFLOW) {
343 if (qpn >= qp) qpn = qp - 1;
344 if (qpn < 1) { // < 0 (?) overflow with minQP
346 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
350 mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
355 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
356 struct intel_encoder_context *encoder_context)
358 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
359 unsigned int rate_control_mode = encoder_context->rate_control_mode;
360 int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
362 // current we only support CBR mode.
363 if (rate_control_mode == VA_RC_CBR) {
364 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
365 mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
366 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
367 mfc_context->vui_hrd.i_frame_number = 0;
369 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
370 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
371 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
377 intel_mfc_hrd_context_update(struct encode_state *encode_state,
378 struct gen6_mfc_context *mfc_context)
380 mfc_context->vui_hrd.i_frame_number++;
383 int intel_mfc_interlace_check(VADriverContextP ctx,
384 struct encode_state *encode_state,
385 struct intel_encoder_context *encoder_context)
387 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
388 VAEncSliceParameterBufferH264 *pSliceParameter;
391 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
392 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
394 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
395 pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
396 mbCount += pSliceParameter->num_macroblocks;
399 if ( mbCount == ( width_in_mbs * height_in_mbs ) )
405 void intel_mfc_brc_prepare(struct encode_state *encode_state,
406 struct intel_encoder_context *encoder_context)
408 unsigned int rate_control_mode = encoder_context->rate_control_mode;
410 if (encoder_context->codec != CODEC_H264 &&
411 encoder_context->codec != CODEC_H264_MVC)
414 if (rate_control_mode == VA_RC_CBR) {
415 /*Programing bit rate control */
416 if (encoder_context->brc.need_reset) {
417 intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
418 intel_mfc_brc_init(encode_state, encoder_context);
421 /*Programing HRD control */
422 if (encoder_context->brc.need_reset)
423 intel_mfc_hrd_context_init(encode_state, encoder_context);
427 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
428 struct encode_state *encode_state,
429 struct intel_encoder_context *encoder_context,
430 struct intel_batchbuffer *slice_batch)
432 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
433 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
434 unsigned int rate_control_mode = encoder_context->rate_control_mode;
435 unsigned int skip_emul_byte_cnt;
437 if (encode_state->packed_header_data[idx]) {
438 VAEncPackedHeaderParameterBuffer *param = NULL;
439 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
440 unsigned int length_in_bits;
442 assert(encode_state->packed_header_param[idx]);
443 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
444 length_in_bits = param->bit_length;
446 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
447 mfc_context->insert_object(ctx,
450 ALIGN(length_in_bits, 32) >> 5,
451 length_in_bits & 0x1f,
455 !param->has_emulation_bytes,
459 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
461 if (encode_state->packed_header_data[idx]) {
462 VAEncPackedHeaderParameterBuffer *param = NULL;
463 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
464 unsigned int length_in_bits;
466 assert(encode_state->packed_header_param[idx]);
467 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
468 length_in_bits = param->bit_length;
470 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
472 mfc_context->insert_object(ctx,
475 ALIGN(length_in_bits, 32) >> 5,
476 length_in_bits & 0x1f,
480 !param->has_emulation_bytes,
484 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
486 if (encode_state->packed_header_data[idx]) {
487 VAEncPackedHeaderParameterBuffer *param = NULL;
488 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
489 unsigned int length_in_bits;
491 assert(encode_state->packed_header_param[idx]);
492 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
493 length_in_bits = param->bit_length;
495 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
496 mfc_context->insert_object(ctx,
499 ALIGN(length_in_bits, 32) >> 5,
500 length_in_bits & 0x1f,
504 !param->has_emulation_bytes,
506 } else if (rate_control_mode == VA_RC_CBR) {
508 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
510 unsigned char *sei_data = NULL;
512 int length_in_bits = build_avc_sei_buffer_timing(
513 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
514 mfc_context->vui_hrd.i_initial_cpb_removal_delay,
516 mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
517 mfc_context->vui_hrd.i_dpb_output_delay_length,
520 mfc_context->insert_object(ctx,
522 (unsigned int *)sei_data,
523 ALIGN(length_in_bits, 32) >> 5,
524 length_in_bits & 0x1f,
534 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
535 struct encode_state *encode_state,
536 struct intel_encoder_context *encoder_context)
538 struct i965_driver_data *i965 = i965_driver_data(ctx);
539 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
540 struct object_surface *obj_surface;
541 struct object_buffer *obj_buffer;
542 GenAvcSurface *gen6_avc_surface;
544 VAStatus vaStatus = VA_STATUS_SUCCESS;
545 int i, j, enable_avc_ildb = 0;
546 VAEncSliceParameterBufferH264 *slice_param;
547 struct i965_coded_buffer_segment *coded_buffer_segment;
548 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
549 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
550 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
552 if (IS_GEN6(i965->intel.device_info)) {
553 /* On the SNB it should be fixed to 128 for the DMV buffer */
557 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
558 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
559 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
561 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
562 assert((slice_param->slice_type == SLICE_TYPE_I) ||
563 (slice_param->slice_type == SLICE_TYPE_SI) ||
564 (slice_param->slice_type == SLICE_TYPE_P) ||
565 (slice_param->slice_type == SLICE_TYPE_SP) ||
566 (slice_param->slice_type == SLICE_TYPE_B));
568 if (slice_param->disable_deblocking_filter_idc != 1) {
577 /*Setup all the input&output object*/
579 /* Setup current frame and current direct mv buffer*/
580 obj_surface = encode_state->reconstructed_object;
581 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
583 if ( obj_surface->private_data == NULL) {
584 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
585 assert(gen6_avc_surface);
586 gen6_avc_surface->dmv_top =
587 dri_bo_alloc(i965->intel.bufmgr,
589 68 * width_in_mbs * height_in_mbs,
591 gen6_avc_surface->dmv_bottom =
592 dri_bo_alloc(i965->intel.bufmgr,
594 68 * width_in_mbs * height_in_mbs,
596 assert(gen6_avc_surface->dmv_top);
597 assert(gen6_avc_surface->dmv_bottom);
598 obj_surface->private_data = (void *)gen6_avc_surface;
599 obj_surface->free_private_data = (void *)gen_free_avc_surface;
601 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
602 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
603 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
604 dri_bo_reference(gen6_avc_surface->dmv_top);
605 dri_bo_reference(gen6_avc_surface->dmv_bottom);
607 if (enable_avc_ildb) {
608 mfc_context->post_deblocking_output.bo = obj_surface->bo;
609 dri_bo_reference(mfc_context->post_deblocking_output.bo);
611 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
612 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
615 mfc_context->surface_state.width = obj_surface->orig_width;
616 mfc_context->surface_state.height = obj_surface->orig_height;
617 mfc_context->surface_state.w_pitch = obj_surface->width;
618 mfc_context->surface_state.h_pitch = obj_surface->height;
620 /* Setup reference frames and direct mv buffers*/
621 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
622 obj_surface = encode_state->reference_objects[i];
624 if (obj_surface && obj_surface->bo) {
625 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
626 dri_bo_reference(obj_surface->bo);
628 /* Check DMV buffer */
629 if ( obj_surface->private_data == NULL) {
631 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
632 assert(gen6_avc_surface);
633 gen6_avc_surface->dmv_top =
634 dri_bo_alloc(i965->intel.bufmgr,
636 68 * width_in_mbs * height_in_mbs,
638 gen6_avc_surface->dmv_bottom =
639 dri_bo_alloc(i965->intel.bufmgr,
641 68 * width_in_mbs * height_in_mbs,
643 assert(gen6_avc_surface->dmv_top);
644 assert(gen6_avc_surface->dmv_bottom);
645 obj_surface->private_data = gen6_avc_surface;
646 obj_surface->free_private_data = gen_free_avc_surface;
649 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
650 /* Setup DMV buffer */
651 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
652 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
653 dri_bo_reference(gen6_avc_surface->dmv_top);
654 dri_bo_reference(gen6_avc_surface->dmv_bottom);
660 mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
661 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
663 obj_buffer = encode_state->coded_buf_object;
664 bo = obj_buffer->buffer_store->bo;
665 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
666 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
667 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
668 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
671 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
672 coded_buffer_segment->mapped = 0;
673 coded_buffer_segment->codec = encoder_context->codec;
679 * The LUT uses the pair of 4-bit units: (shift, base) structure.
681 * So it is necessary to convert one cost into the nearest LUT format.
683 * 2^K *x = 2^n * (1 + deltaX)
684 * k + log2(x) = n + log2(1 + deltaX)
685 * log2(x) = n - k + log2(1 + deltaX)
686 * As X is in the range of [1, 15]
687 * 4 > n - k + log2(1 + deltaX) >= 0
688 * => n + log2(1 + deltaX) >= k > n - 4 + log2(1 + deltaX)
689 * Then we can derive the corresponding K and get the nearest LUT format.
691 int intel_format_lutvalue(int value, int max)
694 int logvalue, temp1, temp2;
699 logvalue = (int)(log2f((float)value));
703 int error, temp_value, base, j, temp_err;
705 j = logvalue - 4 + 1;
707 for(; j <= logvalue; j++) {
711 base = (value + (1 << (j - 1)) - 1) >> j;
716 temp_value = base << j;
717 temp_err = abs(value - temp_value);
718 if (temp_err < error) {
720 ret = (j << 4) | base;
726 temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
727 temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
736 #define VP8_QP_MAX 128
739 static float intel_lambda_qp(int qp)
741 float value, lambdaf;
743 value = value / 6 - 2;
746 lambdaf = roundf(powf(2, value));
751 void intel_h264_calc_mbmvcost_qp(int qp,
753 uint8_t *vme_state_message)
755 int m_cost, j, mv_count;
756 float lambda, m_costf;
758 assert(qp <= QP_MAX);
759 lambda = intel_lambda_qp(qp);
762 vme_state_message[MODE_CHROMA_INTRA] = 0;
763 vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
765 if (slice_type == SLICE_TYPE_I) {
766 vme_state_message[MODE_INTRA_16X16] = 0;
768 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
769 m_cost = lambda * 16;
770 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
772 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
775 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
776 for (j = 1; j < 3; j++) {
777 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
778 m_cost = (int)m_costf;
779 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
782 for (j = 4; j <= 64; j *= 2) {
783 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
784 m_cost = (int)m_costf;
785 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
790 vme_state_message[MODE_INTRA_16X16] = 0x4a;
791 vme_state_message[MODE_INTRA_8X8] = 0x4a;
792 vme_state_message[MODE_INTRA_4X4] = 0x4a;
793 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
794 vme_state_message[MODE_INTER_16X16] = 0x4a;
795 vme_state_message[MODE_INTER_16X8] = 0x4a;
796 vme_state_message[MODE_INTER_8X8] = 0x4a;
797 vme_state_message[MODE_INTER_8X4] = 0x4a;
798 vme_state_message[MODE_INTER_4X4] = 0x4a;
799 vme_state_message[MODE_INTER_BWD] = 0x2a;
802 m_costf = lambda * 10;
803 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
804 m_cost = lambda * 14;
805 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
806 m_cost = lambda * 24;
807 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
808 m_costf = lambda * 3.5;
810 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
811 if (slice_type == SLICE_TYPE_P) {
812 m_costf = lambda * 2.5;
814 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
815 m_costf = lambda * 4;
817 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
818 m_costf = lambda * 1.5;
820 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
821 m_costf = lambda * 3;
823 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
824 m_costf = lambda * 5;
826 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
827 /* BWD is not used in P-frame */
828 vme_state_message[MODE_INTER_BWD] = 0;
830 m_costf = lambda * 2.5;
832 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
833 m_costf = lambda * 5.5;
835 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
836 m_costf = lambda * 3.5;
838 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
839 m_costf = lambda * 5.0;
841 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
842 m_costf = lambda * 6.5;
844 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
845 m_costf = lambda * 1.5;
847 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
853 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
854 struct encode_state *encode_state,
855 struct intel_encoder_context *encoder_context)
857 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
858 struct gen6_vme_context *vme_context = encoder_context->vme_context;
859 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
860 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
862 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
864 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
866 if (encoder_context->rate_control_mode == VA_RC_CQP)
867 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
869 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
871 if (vme_state_message == NULL)
874 intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
877 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
878 struct encode_state *encode_state,
879 struct intel_encoder_context *encoder_context)
881 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
882 struct gen6_vme_context *vme_context = encoder_context->vme_context;
883 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
884 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
885 int qp, m_cost, j, mv_count;
886 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
887 float lambda, m_costf;
889 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
890 int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
892 if (vme_state_message == NULL)
895 if (encoder_context->rate_control_mode == VA_RC_CQP)
896 qp = q_matrix->quantization_index[0];
898 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
900 lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
903 vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
906 vme_state_message[MODE_INTRA_16X16] = 0;
907 m_cost = lambda * 16;
908 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
910 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
913 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
914 for (j = 1; j < 3; j++) {
915 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
916 m_cost = (int)m_costf;
917 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
920 for (j = 4; j <= 64; j *= 2) {
921 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
922 m_cost = (int)m_costf;
923 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
928 vme_state_message[MODE_INTRA_16X16] = 0x4a;
929 vme_state_message[MODE_INTRA_4X4] = 0x4a;
930 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
931 vme_state_message[MODE_INTER_16X16] = 0x4a;
932 vme_state_message[MODE_INTER_16X8] = 0x4a;
933 vme_state_message[MODE_INTER_8X8] = 0x4a;
934 vme_state_message[MODE_INTER_4X4] = 0x4a;
935 vme_state_message[MODE_INTER_BWD] = 0;
938 m_costf = lambda * 10;
939 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
940 m_cost = lambda * 24;
941 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
943 m_costf = lambda * 3.5;
945 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
947 m_costf = lambda * 2.5;
949 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
950 m_costf = lambda * 4;
952 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
953 m_costf = lambda * 1.5;
955 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
956 m_costf = lambda * 5;
958 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
959 /* BWD is not used in P-frame */
960 vme_state_message[MODE_INTER_BWD] = 0;
964 #define MB_SCOREBOARD_A (1 << 0)
965 #define MB_SCOREBOARD_B (1 << 1)
966 #define MB_SCOREBOARD_C (1 << 2)
968 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
970 vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
971 vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
972 vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
976 /* In VME prediction the current mb depends on the neighbour
977 * A/B/C macroblock. So the left/up/up-right dependency should
980 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
981 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
982 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
983 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
984 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
985 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
987 vme_context->gpe_context.vfe_desc7.dword = 0;
991 /* check whether the mb of (x_index, y_index) is out of bound */
992 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
995 if (x_index < 0 || x_index >= mb_width)
997 if (y_index < 0 || y_index >= mb_height)
1000 mb_index = y_index * mb_width + x_index;
1001 if (mb_index < first_mb || mb_index > (first_mb + num_mb))
1007 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1008 struct encode_state *encode_state,
1009 int mb_width, int mb_height,
1011 int transform_8x8_mode_flag,
1012 struct intel_encoder_context *encoder_context)
1014 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1017 unsigned int *command_ptr;
1018 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1019 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1020 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1021 int qp,qp_mb,qp_index;
1022 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1024 if (encoder_context->rate_control_mode == VA_RC_CQP)
1025 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1027 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1029 #define USE_SCOREBOARD (1 << 21)
1031 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1032 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1034 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1035 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1036 int first_mb = pSliceParameter->macroblock_address;
1037 int num_mb = pSliceParameter->num_macroblocks;
1038 unsigned int mb_intra_ub, score_dep;
1039 int x_outer, y_outer, x_inner, y_inner;
1040 int xtemp_outer = 0;
1042 x_outer = first_mb % mb_width;
1043 y_outer = first_mb / mb_width;
1046 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1049 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1053 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1054 score_dep |= MB_SCOREBOARD_A;
1056 if (y_inner != mb_row) {
1057 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1058 score_dep |= MB_SCOREBOARD_B;
1060 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1061 if (x_inner != (mb_width -1)) {
1062 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1063 score_dep |= MB_SCOREBOARD_C;
1067 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1068 *command_ptr++ = kernel;
1069 *command_ptr++ = USE_SCOREBOARD;
1072 /* the (X, Y) term of scoreboard */
1073 *command_ptr++ = ((y_inner << 16) | x_inner);
1074 *command_ptr++ = score_dep;
1076 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1077 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1078 /* QP occupies one byte */
1079 if (vme_context->roi_enabled) {
1080 qp_index = y_inner * mb_width + x_inner;
1081 qp_mb = *(vme_context->qp_per_mb + qp_index);
1084 *command_ptr++ = qp_mb;
1091 xtemp_outer = mb_width - 2;
1092 if (xtemp_outer < 0)
1094 x_outer = xtemp_outer;
1095 y_outer = first_mb / mb_width;
1096 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1099 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1103 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1104 score_dep |= MB_SCOREBOARD_A;
1106 if (y_inner != mb_row) {
1107 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1108 score_dep |= MB_SCOREBOARD_B;
1110 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1112 if (x_inner != (mb_width -1)) {
1113 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1114 score_dep |= MB_SCOREBOARD_C;
1118 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1119 *command_ptr++ = kernel;
1120 *command_ptr++ = USE_SCOREBOARD;
1123 /* the (X, Y) term of scoreboard */
1124 *command_ptr++ = ((y_inner << 16) | x_inner);
1125 *command_ptr++ = score_dep;
1127 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1128 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1129 /* qp occupies one byte */
1130 if (vme_context->roi_enabled) {
1131 qp_index = y_inner * mb_width + x_inner;
1132 qp_mb = *(vme_context->qp_per_mb + qp_index);
1135 *command_ptr++ = qp_mb;
1141 if (x_outer >= mb_width) {
1143 x_outer = xtemp_outer;
1149 *command_ptr++ = MI_BATCH_BUFFER_END;
1151 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1155 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1157 unsigned int is_long_term =
1158 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1159 unsigned int is_top_field =
1160 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1161 unsigned int is_bottom_field =
1162 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1164 return ((is_long_term << 6) |
1165 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1166 (frame_store_id << 1) |
1167 ((is_top_field ^ 1) & is_bottom_field));
1171 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1172 struct encode_state *encode_state,
1173 struct intel_encoder_context *encoder_context)
1175 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1176 struct intel_batchbuffer *batch = encoder_context->base.batch;
1178 struct object_surface *obj_surface;
1179 unsigned int fref_entry, bref_entry;
1181 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1183 fref_entry = 0x80808080;
1184 bref_entry = 0x80808080;
1185 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1187 if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1188 int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1190 if (ref_idx_l0 > 3) {
1191 WARN_ONCE("ref_idx_l0 is out of range\n");
1195 obj_surface = vme_context->used_reference_objects[0];
1197 for (i = 0; i < 16; i++) {
1199 obj_surface == encode_state->reference_objects[i]) {
1204 if (frame_index == -1) {
1205 WARN_ONCE("RefPicList0 is not found in DPB!\n");
1207 int ref_idx_l0_shift = ref_idx_l0 * 8;
1208 fref_entry &= ~(0xFF << ref_idx_l0_shift);
1209 fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1213 if (slice_type == SLICE_TYPE_B) {
1214 int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1216 if (ref_idx_l1 > 3) {
1217 WARN_ONCE("ref_idx_l1 is out of range\n");
1221 obj_surface = vme_context->used_reference_objects[1];
1223 for (i = 0; i < 16; i++) {
1225 obj_surface == encode_state->reference_objects[i]) {
1230 if (frame_index == -1) {
1231 WARN_ONCE("RefPicList1 is not found in DPB!\n");
1233 int ref_idx_l1_shift = ref_idx_l1 * 8;
1234 bref_entry &= ~(0xFF << ref_idx_l1_shift);
1235 bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1239 BEGIN_BCS_BATCH(batch, 10);
1240 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1241 OUT_BCS_BATCH(batch, 0); //Select L0
1242 OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference
1243 for(i = 0; i < 7; i++) {
1244 OUT_BCS_BATCH(batch, 0x80808080);
1246 ADVANCE_BCS_BATCH(batch);
1248 BEGIN_BCS_BATCH(batch, 10);
1249 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1250 OUT_BCS_BATCH(batch, 1); //Select L1
1251 OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference
1252 for(i = 0; i < 7; i++) {
1253 OUT_BCS_BATCH(batch, 0x80808080);
1255 ADVANCE_BCS_BATCH(batch);
1259 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1260 struct encode_state *encode_state,
1261 struct intel_encoder_context *encoder_context)
1263 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1264 uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1265 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1266 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1267 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1268 uint32_t mv_x, mv_y;
1269 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1270 VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1271 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1273 if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1276 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1279 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1283 WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1288 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1289 if (pic_param->picture_type != VAEncPictureTypeIntra) {
1290 int qp, m_cost, j, mv_count;
1291 float lambda, m_costf;
1292 slice_param = (VAEncSliceParameterBufferMPEG2 *)
1293 encode_state->slice_params_ext[0]->buffer;
1294 qp = slice_param->quantiser_scale_code;
1295 lambda = intel_lambda_qp(qp);
1296 /* No Intra prediction. So it is zero */
1297 vme_state_message[MODE_INTRA_8X8] = 0;
1298 vme_state_message[MODE_INTRA_4X4] = 0;
1299 vme_state_message[MODE_INTER_MV0] = 0;
1300 for (j = 1; j < 3; j++) {
1301 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1302 m_cost = (int)m_costf;
1303 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1306 for (j = 4; j <= 64; j *= 2) {
1307 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1308 m_cost = (int)m_costf;
1309 vme_state_message[MODE_INTER_MV0 + mv_count] =
1310 intel_format_lutvalue(m_cost, 0x6f);
1314 /* It can only perform the 16x16 search. So mode cost can be ignored for
1315 * the other mode. for example: 16x8/8x8
1317 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1318 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1320 vme_state_message[MODE_INTER_16X8] = 0;
1321 vme_state_message[MODE_INTER_8X8] = 0;
1322 vme_state_message[MODE_INTER_8X4] = 0;
1323 vme_state_message[MODE_INTER_4X4] = 0;
1324 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1327 vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1329 vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1334 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1335 struct encode_state *encode_state,
1336 int mb_width, int mb_height,
1338 struct intel_encoder_context *encoder_context)
1340 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1341 unsigned int *command_ptr;
1343 #define MPEG2_SCOREBOARD (1 << 21)
1345 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1346 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1349 unsigned int mb_intra_ub, score_dep;
1350 int x_outer, y_outer, x_inner, y_inner;
1351 int xtemp_outer = 0;
1353 int num_mb = mb_width * mb_height;
1359 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1362 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1366 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1367 score_dep |= MB_SCOREBOARD_A;
1370 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1371 score_dep |= MB_SCOREBOARD_B;
1374 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1376 if (x_inner != (mb_width -1)) {
1377 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1378 score_dep |= MB_SCOREBOARD_C;
1382 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1383 *command_ptr++ = kernel;
1384 *command_ptr++ = MPEG2_SCOREBOARD;
1387 /* the (X, Y) term of scoreboard */
1388 *command_ptr++ = ((y_inner << 16) | x_inner);
1389 *command_ptr++ = score_dep;
1391 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1392 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1399 xtemp_outer = mb_width - 2;
1400 if (xtemp_outer < 0)
1402 x_outer = xtemp_outer;
1404 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1407 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1411 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1412 score_dep |= MB_SCOREBOARD_A;
1415 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1416 score_dep |= MB_SCOREBOARD_B;
1419 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1421 if (x_inner != (mb_width -1)) {
1422 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1423 score_dep |= MB_SCOREBOARD_C;
1427 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1428 *command_ptr++ = kernel;
1429 *command_ptr++ = MPEG2_SCOREBOARD;
1432 /* the (X, Y) term of scoreboard */
1433 *command_ptr++ = ((y_inner << 16) | x_inner);
1434 *command_ptr++ = score_dep;
1436 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1437 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1443 if (x_outer >= mb_width) {
1445 x_outer = xtemp_outer;
1451 *command_ptr++ = MI_BATCH_BUFFER_END;
1453 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1458 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1459 VAPictureH264 *ref_list,
1463 int i, found = -1, min = 0x7FFFFFFF;
1465 for (i = 0; i < num_pictures; i++) {
1468 if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1469 (ref_list[i].picture_id == VA_INVALID_SURFACE))
1472 tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1477 if (tmp > 0 && tmp < min) {
1487 intel_avc_vme_reference_state(VADriverContextP ctx,
1488 struct encode_state *encode_state,
1489 struct intel_encoder_context *encoder_context,
1492 void (* vme_source_surface_state)(
1493 VADriverContextP ctx,
1495 struct object_surface *obj_surface,
1496 struct intel_encoder_context *encoder_context))
1498 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1499 struct object_surface *obj_surface = NULL;
1500 struct i965_driver_data *i965 = i965_driver_data(ctx);
1501 VASurfaceID ref_surface_id;
1502 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1503 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1504 int max_num_references;
1505 VAPictureH264 *curr_pic;
1506 VAPictureH264 *ref_list;
1509 if (list_index == 0) {
1510 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1511 ref_list = slice_param->RefPicList0;
1513 max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1514 ref_list = slice_param->RefPicList1;
1517 if (max_num_references == 1) {
1518 if (list_index == 0) {
1519 ref_surface_id = slice_param->RefPicList0[0].picture_id;
1520 vme_context->used_references[0] = &slice_param->RefPicList0[0];
1522 ref_surface_id = slice_param->RefPicList1[0].picture_id;
1523 vme_context->used_references[1] = &slice_param->RefPicList1[0];
1526 if (ref_surface_id != VA_INVALID_SURFACE)
1527 obj_surface = SURFACE(ref_surface_id);
1531 obj_surface = encode_state->reference_objects[list_index];
1532 vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1537 curr_pic = &pic_param->CurrPic;
1539 /* select the reference frame in temporal space */
1540 ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1541 ref_surface_id = ref_list[ref_idx].picture_id;
1543 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1544 obj_surface = SURFACE(ref_surface_id);
1546 vme_context->used_reference_objects[list_index] = obj_surface;
1547 vme_context->used_references[list_index] = &ref_list[ref_idx];
1552 assert(ref_idx >= 0);
1553 vme_context->used_reference_objects[list_index] = obj_surface;
1554 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1555 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1560 vme_context->used_reference_objects[list_index] = NULL;
1561 vme_context->used_references[list_index] = NULL;
1562 vme_context->ref_index_in_mb[list_index] = 0;
1566 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1567 struct encode_state *encode_state,
1568 struct intel_encoder_context *encoder_context,
1570 struct intel_batchbuffer *slice_batch)
1572 int count, i, start_index;
1573 unsigned int length_in_bits;
1574 VAEncPackedHeaderParameterBuffer *param = NULL;
1575 unsigned int *header_data = NULL;
1576 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1577 int slice_header_index;
1579 if (encode_state->slice_header_index[slice_index] == 0)
1580 slice_header_index = -1;
1582 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1584 count = encode_state->slice_rawdata_count[slice_index];
1585 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1587 for (i = 0; i < count; i++) {
1588 unsigned int skip_emul_byte_cnt;
1590 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1592 param = (VAEncPackedHeaderParameterBuffer *)
1593 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1595 /* skip the slice header packed data type as it is lastly inserted */
1596 if (param->type == VAEncPackedHeaderSlice)
1599 length_in_bits = param->bit_length;
1601 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1603 /* as the slice header is still required, the last header flag is set to
1606 mfc_context->insert_object(ctx,
1609 ALIGN(length_in_bits, 32) >> 5,
1610 length_in_bits & 0x1f,
1614 !param->has_emulation_bytes,
1618 if (slice_header_index == -1) {
1619 unsigned char *slice_header = NULL;
1620 int slice_header_length_in_bits = 0;
1621 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1622 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1623 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1625 /* No slice header data is passed. And the driver needs to generate it */
1626 /* For the Normal H264 */
1627 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1631 mfc_context->insert_object(ctx, encoder_context,
1632 (unsigned int *)slice_header,
1633 ALIGN(slice_header_length_in_bits, 32) >> 5,
1634 slice_header_length_in_bits & 0x1f,
1635 5, /* first 5 bytes are start code + nal unit type */
1636 1, 0, 1, slice_batch);
1640 unsigned int skip_emul_byte_cnt;
1642 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1644 param = (VAEncPackedHeaderParameterBuffer *)
1645 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1646 length_in_bits = param->bit_length;
1648 /* as the slice header is the last header data for one slice,
1649 * the last header flag is set to one.
1651 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1653 mfc_context->insert_object(ctx,
1656 ALIGN(length_in_bits, 32) >> 5,
1657 length_in_bits & 0x1f,
1661 !param->has_emulation_bytes,
1669 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1670 struct encode_state *encode_state,
1671 struct intel_encoder_context *encoder_context)
1673 struct i965_driver_data *i965 = i965_driver_data(ctx);
1674 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1675 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1678 uint8_t *cost_table;
1680 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1683 if (slice_type == SLICE_TYPE_I) {
1684 if (vme_context->i_qp_cost_table)
1686 } else if (slice_type == SLICE_TYPE_P) {
1687 if (vme_context->p_qp_cost_table)
1690 if (vme_context->b_qp_cost_table)
1694 /* It is enough to allocate 32 bytes for each qp. */
1695 bo = dri_bo_alloc(i965->intel.bufmgr,
1701 assert(bo->virtual);
1702 cost_table = (uint8_t *)(bo->virtual);
1703 for (qp = 0; qp < QP_MAX; qp++) {
1704 intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1710 if (slice_type == SLICE_TYPE_I) {
1711 vme_context->i_qp_cost_table = bo;
1712 } else if (slice_type == SLICE_TYPE_P) {
1713 vme_context->p_qp_cost_table = bo;
1715 vme_context->b_qp_cost_table = bo;
1718 vme_context->cost_table_size = QP_MAX * 32;
1723 intel_h264_setup_cost_surface(VADriverContextP ctx,
1724 struct encode_state *encode_state,
1725 struct intel_encoder_context *encoder_context,
1726 unsigned long binding_table_offset,
1727 unsigned long surface_state_offset)
1729 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1730 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1734 struct i965_buffer_surface cost_table;
1736 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1739 if (slice_type == SLICE_TYPE_I) {
1740 bo = vme_context->i_qp_cost_table;
1741 } else if (slice_type == SLICE_TYPE_P) {
1742 bo = vme_context->p_qp_cost_table;
1744 bo = vme_context->b_qp_cost_table;
1748 cost_table.num_blocks = QP_MAX;
1749 cost_table.pitch = 16;
1750 cost_table.size_block = 32;
1752 vme_context->vme_buffer_suface_setup(ctx,
1753 &vme_context->gpe_context,
1755 binding_table_offset,
1756 surface_state_offset);
1760 * the idea of conversion between qp and qstep comes from scaling process
1761 * of transform coeff for Luma component in H264 spec.
1763 * In order to avoid too small qstep, it is multiplied by 16.
1765 static float intel_h264_qp_qstep(int qp)
1769 value = value / 6 - 2;
1770 qstep = powf(2, value);
1774 static int intel_h264_qstep_qp(float qstep)
1778 qp = 12.0f + 6.0f * log2f(qstep);
1784 * Currently it is based on the following assumption:
1785 * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1786 * total_aread * 1 / baseqp_qstep
1788 * qstep is the linearized quantizer of H264 quantizer
1791 int row_start_in_mb;
1793 int col_start_in_mb;
1803 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1805 VAEncMiscParameterBufferROI *pMiscParamROI,
1806 struct encode_state *encode_state,
1807 struct intel_encoder_context *encoder_context)
1810 VAEncROI *region_roi;
1813 ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1818 float qstep_nonroi, qstep_base;
1819 float roi_area, total_area, nonroi_area;
1822 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1823 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1824 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1825 int mbs_in_picture = width_in_mbs * height_in_mbs;
1827 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1828 VAStatus vaStatus = VA_STATUS_SUCCESS;
1830 if(pMiscParamROI != NULL)
1832 num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi;
1834 /* currently roi_value_is_qp_delta is the only supported mode of priority.
1836 * qp_delta set by user is added to base_qp, which is then clapped by
1837 * [base_qp-min_delta, base_qp+max_delta].
1839 ASSERT_RET(pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta,VA_STATUS_ERROR_INVALID_PARAMETER);
1842 /* when the base_qp is lower than 12, the quality is quite good based
1843 * on the H264 test experience.
1844 * In such case it is unnecessary to adjust the quality for ROI region.
1846 if (base_qp <= 12) {
1847 nonroi_qp = base_qp;
1854 for (i = 0; i < num_roi; i++) {
1855 int row_start, row_end, col_start, col_end;
1856 int roi_width_mbs, roi_height_mbs;
1861 region_roi = (VAEncROI *)pMiscParamROI->roi + i;
1863 col_start = region_roi->roi_rectangle.x;
1864 col_end = col_start + region_roi->roi_rectangle.width;
1865 row_start = region_roi->roi_rectangle.y;
1866 row_end = row_start + region_roi->roi_rectangle.height;
1867 col_start = col_start / 16;
1868 col_end = (col_end + 15) / 16;
1869 row_start = row_start / 16;
1870 row_end = (row_end + 15) / 16;
1872 roi_width_mbs = col_end - col_start;
1873 roi_height_mbs = row_end - row_start;
1874 mbs_in_roi = roi_width_mbs * roi_height_mbs;
1876 param_regions[i].row_start_in_mb = row_start;
1877 param_regions[i].row_end_in_mb = row_end;
1878 param_regions[i].col_start_in_mb = col_start;
1879 param_regions[i].col_end_in_mb = col_end;
1880 param_regions[i].width_mbs = roi_width_mbs;
1881 param_regions[i].height_mbs = roi_height_mbs;
1883 roi_qp = base_qp + region_roi->roi_value;
1884 BRC_CLIP(roi_qp, 1, 51);
1886 param_regions[i].roi_qp = roi_qp;
1887 qstep_roi = intel_h264_qp_qstep(roi_qp);
1889 roi_area += mbs_in_roi;
1890 sum_roi += mbs_in_roi / qstep_roi;
1893 total_area = mbs_in_picture;
1894 nonroi_area = total_area - roi_area;
1896 qstep_base = intel_h264_qp_qstep(base_qp);
1897 temp = (total_area / qstep_base - sum_roi);
1902 qstep_nonroi = nonroi_area / temp;
1903 nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
1906 BRC_CLIP(nonroi_qp, 1, 51);
1909 memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
1913 for (i = 0; i < num_roi; i++) {
1914 for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
1915 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
1916 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
1924 intel_h264_enc_roi_config(VADriverContextP ctx,
1925 struct encode_state *encode_state,
1926 struct intel_encoder_context *encoder_context)
1930 VAEncROI *region_roi;
1931 struct i965_driver_data *i965 = i965_driver_data(ctx);
1932 VAEncMiscParameterBuffer* pMiscParamROI;
1933 VAEncMiscParameterBufferROI *pParamROI = NULL;
1934 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1935 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1936 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1937 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1938 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1940 int row_start, row_end, col_start, col_end;
1943 vme_context->roi_enabled = 0;
1944 /* Restriction: Disable ROI when multi-slice is enabled */
1945 if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1948 if (encode_state->misc_param[VAEncMiscParameterTypeROI][0] != NULL) {
1949 pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI][0]->buffer;
1950 pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
1952 /* check whether number of ROI is correct */
1953 num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi;
1957 vme_context->roi_enabled = 1;
1959 if (!vme_context->roi_enabled)
1962 if ((vme_context->saved_width_mbs != width_in_mbs) ||
1963 (vme_context->saved_height_mbs != height_in_mbs)) {
1964 free(vme_context->qp_per_mb);
1965 vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1967 vme_context->saved_width_mbs = width_in_mbs;
1968 vme_context->saved_height_mbs = height_in_mbs;
1969 assert(vme_context->qp_per_mb);
1971 if (encoder_context->rate_control_mode == VA_RC_CBR) {
1973 * TODO: More complex Qp adjust needs to be added.
1974 * Currently it is initialized to slice_qp.
1976 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1978 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1980 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1981 intel_h264_enc_roi_cbr(ctx, qp, pParamROI,encode_state, encoder_context);
1983 } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1984 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1985 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1988 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1989 memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1992 for (j = num_roi; j ; j--) {
1993 int qp_delta, qp_clip;
1995 region_roi = (VAEncROI *)pParamROI->roi + j - 1;
1997 col_start = region_roi->roi_rectangle.x;
1998 col_end = col_start + region_roi->roi_rectangle.width;
1999 row_start = region_roi->roi_rectangle.y;
2000 row_end = row_start + region_roi->roi_rectangle.height;
2002 col_start = col_start / 16;
2003 col_end = (col_end + 15) / 16;
2004 row_start = row_start / 16;
2005 row_end = (row_end + 15) / 16;
2007 qp_delta = region_roi->roi_value;
2008 qp_clip = qp + qp_delta;
2010 BRC_CLIP(qp_clip, 1, 51);
2012 for (i = row_start; i < row_end; i++) {
2013 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
2014 memset(qp_ptr, qp_clip, (col_end - col_start));
2019 * TODO: Disable it for non CBR-CQP.
2021 vme_context->roi_enabled = 0;
2024 if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2025 encoder_context->soft_batch_force = 1;
2032 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2033 VAPictureHEVC *ref_list,
2037 int i, found = -1, min = 0x7FFFFFFF;
2039 for (i = 0; i < num_pictures; i++) {
2042 if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2043 (ref_list[i].picture_id == VA_INVALID_SURFACE))
2046 tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2051 if (tmp > 0 && tmp < min) {
2060 intel_hevc_vme_reference_state(VADriverContextP ctx,
2061 struct encode_state *encode_state,
2062 struct intel_encoder_context *encoder_context,
2065 void (* vme_source_surface_state)(
2066 VADriverContextP ctx,
2068 struct object_surface *obj_surface,
2069 struct intel_encoder_context *encoder_context))
2071 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2072 struct object_surface *obj_surface = NULL;
2073 struct i965_driver_data *i965 = i965_driver_data(ctx);
2074 VASurfaceID ref_surface_id;
2075 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2076 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2077 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2078 int max_num_references;
2079 VAPictureHEVC *curr_pic;
2080 VAPictureHEVC *ref_list;
2082 unsigned int is_hevc10 = 0;
2083 GenHevcSurface *hevc_encoder_surface = NULL;
2085 if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2086 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2089 if (list_index == 0) {
2090 max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2091 ref_list = slice_param->ref_pic_list0;
2093 max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2094 ref_list = slice_param->ref_pic_list1;
2097 if (max_num_references == 1) {
2098 if (list_index == 0) {
2099 ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2100 vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2102 ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2103 vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2106 if (ref_surface_id != VA_INVALID_SURFACE)
2107 obj_surface = SURFACE(ref_surface_id);
2111 obj_surface = encode_state->reference_objects[list_index];
2112 vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2117 curr_pic = &pic_param->decoded_curr_pic;
2119 /* select the reference frame in temporal space */
2120 ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2121 ref_surface_id = ref_list[ref_idx].picture_id;
2123 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2124 obj_surface = SURFACE(ref_surface_id);
2126 vme_context->used_reference_objects[list_index] = obj_surface;
2127 vme_context->used_references[list_index] = &ref_list[ref_idx];
2132 assert(ref_idx >= 0);
2133 vme_context->used_reference_objects[list_index] = obj_surface;
2136 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2137 assert(hevc_encoder_surface);
2138 obj_surface = hevc_encoder_surface->nv12_surface_obj;
2140 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2141 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2146 vme_context->used_reference_objects[list_index] = NULL;
2147 vme_context->used_references[list_index] = NULL;
2148 vme_context->ref_index_in_mb[list_index] = 0;
2152 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2153 struct encode_state *encode_state,
2154 struct intel_encoder_context *encoder_context)
2156 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2157 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2158 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2159 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2160 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2161 int qp, m_cost, j, mv_count;
2162 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2163 float lambda, m_costf;
2165 /* here no SI SP slice for HEVC, do not need slice fixup */
2166 int slice_type = slice_param->slice_type;
2169 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2171 if(encoder_context->rate_control_mode == VA_RC_CBR)
2173 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2174 if(slice_type == HEVC_SLICE_B) {
2175 if(pSequenceParameter->ip_period == 1)
2177 slice_type = HEVC_SLICE_P;
2178 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2180 }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2181 slice_type = HEVC_SLICE_P;
2182 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2188 if (vme_state_message == NULL)
2191 assert(qp <= QP_MAX);
2192 lambda = intel_lambda_qp(qp);
2193 if (slice_type == HEVC_SLICE_I) {
2194 vme_state_message[MODE_INTRA_16X16] = 0;
2195 m_cost = lambda * 4;
2196 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2197 m_cost = lambda * 16;
2198 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2199 m_cost = lambda * 3;
2200 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2203 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2204 for (j = 1; j < 3; j++) {
2205 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2206 m_cost = (int)m_costf;
2207 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2210 for (j = 4; j <= 64; j *= 2) {
2211 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2212 m_cost = (int)m_costf;
2213 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2218 vme_state_message[MODE_INTRA_16X16] = 0x4a;
2219 vme_state_message[MODE_INTRA_8X8] = 0x4a;
2220 vme_state_message[MODE_INTRA_4X4] = 0x4a;
2221 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2222 vme_state_message[MODE_INTER_16X16] = 0x4a;
2223 vme_state_message[MODE_INTER_16X8] = 0x4a;
2224 vme_state_message[MODE_INTER_8X8] = 0x4a;
2225 vme_state_message[MODE_INTER_8X4] = 0x4a;
2226 vme_state_message[MODE_INTER_4X4] = 0x4a;
2227 vme_state_message[MODE_INTER_BWD] = 0x2a;
2230 m_costf = lambda * 10;
2231 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2232 m_cost = lambda * 14;
2233 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2234 m_cost = lambda * 24;
2235 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2236 m_costf = lambda * 3.5;
2238 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2239 if (slice_type == HEVC_SLICE_P) {
2240 m_costf = lambda * 2.5;
2242 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2243 m_costf = lambda * 4;
2245 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2246 m_costf = lambda * 1.5;
2248 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2249 m_costf = lambda * 3;
2251 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2252 m_costf = lambda * 5;
2254 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2255 /* BWD is not used in P-frame */
2256 vme_state_message[MODE_INTER_BWD] = 0;
2258 m_costf = lambda * 2.5;
2260 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2261 m_costf = lambda * 5.5;
2263 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2264 m_costf = lambda * 3.5;
2266 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2267 m_costf = lambda * 5.0;
2269 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2270 m_costf = lambda * 6.5;
2272 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2273 m_costf = lambda * 1.5;
2275 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);