2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
48 #define log2f(x) (logf(x)/(float)M_LN2)
51 int intel_avc_enc_slice_type_fixup(int slice_type)
53 if (slice_type == SLICE_TYPE_SP ||
54 slice_type == SLICE_TYPE_P)
55 slice_type = SLICE_TYPE_P;
56 else if (slice_type == SLICE_TYPE_SI ||
57 slice_type == SLICE_TYPE_I)
58 slice_type = SLICE_TYPE_I;
60 if (slice_type != SLICE_TYPE_B)
61 WARN_ONCE("Invalid slice type for H.264 encoding!\n");
63 slice_type = SLICE_TYPE_B;
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
71 struct intel_encoder_context *encoder_context)
73 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
76 for(i = 0 ; i < 3; i++) {
77 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79 mfc_context->bit_rate_control_context[i].GrowInit = 6;
80 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
84 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94 struct intel_encoder_context* encoder_context)
96 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97 double bitrate, framerate;
98 double frame_per_bits = 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99 double qp1_size = 0.1 * frame_per_bits;
100 double qp51_size = 0.001 * frame_per_bits;
101 int min_qp = MAX(1, encoder_context->brc.min_qp);
102 double bpf, factor, hrd_factor;
103 int inum = encoder_context->brc.num_iframes_in_gop,
104 pnum = encoder_context->brc.num_pframes_in_gop,
105 bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
106 int intra_period = encoder_context->brc.gop_size;
109 if (encoder_context->layer.num_layers > 1)
110 qp1_size = 0.15 * frame_per_bits;
112 mfc_context->brc.mode = encoder_context->rate_control_mode;
114 mfc_context->hrd.violation_noted = 0;
116 for (i = 0; i < encoder_context->layer.num_layers; i++) {
117 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
118 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
119 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
122 bitrate = encoder_context->brc.bits_per_second[0];
123 framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
125 bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
126 framerate = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) -
127 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
130 if (i == encoder_context->layer.num_layers - 1)
133 factor = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) /
134 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
137 hrd_factor = (double)bitrate / encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
139 mfc_context->hrd.buffer_size[i] = (unsigned int)(encoder_context->brc.hrd_buffer_size * hrd_factor);
140 mfc_context->hrd.current_buffer_fullness[i] =
141 (double)(encoder_context->brc.hrd_initial_buffer_fullness < encoder_context->brc.hrd_buffer_size) ?
142 encoder_context->brc.hrd_initial_buffer_fullness : encoder_context->brc.hrd_buffer_size / 2.;
143 mfc_context->hrd.current_buffer_fullness[i] *= hrd_factor;
144 mfc_context->hrd.target_buffer_fullness[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / 2.;
145 mfc_context->hrd.buffer_capacity[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / qp1_size;
147 if (encoder_context->layer.num_layers > 1) {
149 intra_period = (int)(encoder_context->brc.gop_size * factor);
151 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor);
152 bnum = intra_period - inum - pnum;
154 intra_period = (int)(encoder_context->brc.gop_size * factor) - intra_period;
156 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor) - pnum;
157 bnum = intra_period - inum - pnum;
161 mfc_context->brc.gop_nums[i][SLICE_TYPE_I] = inum;
162 mfc_context->brc.gop_nums[i][SLICE_TYPE_P] = pnum;
163 mfc_context->brc.gop_nums[i][SLICE_TYPE_B] = bnum;
165 mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
166 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
167 mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
168 mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
170 bpf = mfc_context->brc.bits_per_frame[i] = bitrate/framerate;
172 if (encoder_context->brc.initial_qp) {
173 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = encoder_context->brc.initial_qp;
174 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = encoder_context->brc.initial_qp;
175 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = encoder_context->brc.initial_qp;
177 if ((bpf > qp51_size) && (bpf < qp1_size)) {
178 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
180 else if (bpf >= qp1_size)
181 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
182 else if (bpf <= qp51_size)
183 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
185 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
186 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
189 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], min_qp, 51);
190 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], min_qp, 51);
191 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], min_qp, 51);
195 int intel_mfc_update_hrd(struct encode_state *encode_state,
196 struct intel_encoder_context *encoder_context,
199 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
200 int layer_id = encoder_context->layer.curr_frame_layer_id;
201 double prev_bf = mfc_context->hrd.current_buffer_fullness[layer_id];
203 mfc_context->hrd.current_buffer_fullness[layer_id] -= frame_bits;
205 if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] <= 0.) {
206 mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
207 return BRC_UNDERFLOW;
210 mfc_context->hrd.current_buffer_fullness[layer_id] += mfc_context->brc.bits_per_frame[layer_id];
211 if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] > mfc_context->hrd.buffer_size[layer_id]) {
212 if (mfc_context->brc.mode == VA_RC_VBR)
213 mfc_context->hrd.current_buffer_fullness[layer_id] = mfc_context->hrd.buffer_size[layer_id];
215 mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
219 return BRC_NO_HRD_VIOLATION;
222 int intel_mfc_brc_postpack(struct encode_state *encode_state,
223 struct intel_encoder_context *encoder_context,
226 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
227 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
228 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
229 int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
230 int curr_frame_layer_id, next_frame_layer_id;
232 int qp; // quantizer of previously encoded slice of current type
233 int qpn; // predicted quantizer for next frame of current type in integer format
234 double qpf; // predicted quantizer for next frame of current type in float format
235 double delta_qp; // QP correction
236 int min_qp = MAX(1, encoder_context->brc.min_qp);
237 int target_frame_size, frame_size_next;
239 * x - how far we are from HRD buffer borders
240 * y - how far we are from target HRD buffer fullness
243 double frame_size_alpha;
245 if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
246 curr_frame_layer_id = 0;
247 next_frame_layer_id = 0;
249 curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
250 next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
253 /* checking wthether HRD compliance first */
254 sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
256 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
259 next_frame_layer_id = curr_frame_layer_id;
262 mfc_context->brc.bits_prev_frame[curr_frame_layer_id] = frame_bits;
263 frame_bits = mfc_context->brc.bits_prev_frame[next_frame_layer_id];
265 mfc_context->brc.prev_slice_type[curr_frame_layer_id] = slicetype;
266 slicetype = mfc_context->brc.prev_slice_type[next_frame_layer_id];
268 /* 0 means the next frame is the first frame of next layer */
272 qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
273 qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
274 qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
276 qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
278 target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
279 if (mfc_context->hrd.buffer_capacity[next_frame_layer_id] < 5)
280 frame_size_alpha = 0;
282 frame_size_alpha = (double)mfc_context->brc.gop_nums[next_frame_layer_id][slicetype];
283 if (frame_size_alpha > 30) frame_size_alpha = 30;
284 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
285 (double)(frame_size_alpha + 1.);
287 /* frame_size_next: avoiding negative number and too small value */
288 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
289 frame_size_next = (int)((double)target_frame_size * 0.25);
291 qpf = (double)qp * target_frame_size / frame_size_next;
292 qpn = (int)(qpf + 0.5);
295 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
296 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] += qpf - qpn;
297 if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] > 1.0) {
299 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
300 } else if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] < -1.0) {
302 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
305 /* making sure that QP is not changing too fast */
306 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
307 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
308 /* making sure that with QP predictions we did do not leave QPs range */
309 BRC_CLIP(qpn, 1, 51);
311 /* calculating QP delta as some function*/
312 x = mfc_context->hrd.target_buffer_fullness[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
314 x /= mfc_context->hrd.target_buffer_fullness[next_frame_layer_id];
315 y = mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
318 x /= (mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.target_buffer_fullness[next_frame_layer_id]);
319 y = mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
321 if (y < 0.01) y = 0.01;
323 else if (x < -1) x = -1;
325 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
326 qpn = (int)(qpn + delta_qp + 0.5);
328 /* making sure that with QP predictions we did do not leave QPs range */
329 BRC_CLIP(qpn, min_qp, 51);
331 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
332 /* correcting QPs of slices of other types */
333 if (slicetype == SLICE_TYPE_P) {
334 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
335 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
336 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
337 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
338 } else if (slicetype == SLICE_TYPE_I) {
339 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
340 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
341 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
342 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
343 } else { // SLICE_TYPE_B
344 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
345 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
346 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
347 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
349 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], min_qp, 51);
350 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], min_qp, 51);
351 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], min_qp, 51);
352 } else if (sts == BRC_UNDERFLOW) { // underflow
353 if (qpn <= qp) qpn = qp + 1;
356 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
358 } else if (sts == BRC_OVERFLOW) {
359 if (qpn >= qp) qpn = qp - 1;
360 if (qpn < min_qp) { // overflow with minQP
362 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
366 mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
371 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
372 struct intel_encoder_context *encoder_context)
374 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
375 unsigned int rate_control_mode = encoder_context->rate_control_mode;
376 int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
378 // current we only support CBR mode.
379 if (rate_control_mode == VA_RC_CBR) {
380 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
381 mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
382 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
383 mfc_context->vui_hrd.i_frame_number = 0;
385 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
386 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
387 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
393 intel_mfc_hrd_context_update(struct encode_state *encode_state,
394 struct gen6_mfc_context *mfc_context)
396 mfc_context->vui_hrd.i_frame_number++;
399 int intel_mfc_interlace_check(VADriverContextP ctx,
400 struct encode_state *encode_state,
401 struct intel_encoder_context *encoder_context)
403 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
404 VAEncSliceParameterBufferH264 *pSliceParameter;
407 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
408 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
410 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
411 pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
412 mbCount += pSliceParameter->num_macroblocks;
415 if ( mbCount == ( width_in_mbs * height_in_mbs ) )
421 void intel_mfc_brc_prepare(struct encode_state *encode_state,
422 struct intel_encoder_context *encoder_context)
424 unsigned int rate_control_mode = encoder_context->rate_control_mode;
426 if (encoder_context->codec != CODEC_H264 &&
427 encoder_context->codec != CODEC_H264_MVC)
430 if (rate_control_mode == VA_RC_CBR) {
431 /*Programing bit rate control */
432 if (encoder_context->brc.need_reset) {
433 intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
434 intel_mfc_brc_init(encode_state, encoder_context);
437 /*Programing HRD control */
438 if (encoder_context->brc.need_reset)
439 intel_mfc_hrd_context_init(encode_state, encoder_context);
443 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
444 struct encode_state *encode_state,
445 struct intel_encoder_context *encoder_context,
446 struct intel_batchbuffer *slice_batch)
448 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
449 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
450 unsigned int rate_control_mode = encoder_context->rate_control_mode;
451 unsigned int skip_emul_byte_cnt;
453 if (encode_state->packed_header_data[idx]) {
454 VAEncPackedHeaderParameterBuffer *param = NULL;
455 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
456 unsigned int length_in_bits;
458 assert(encode_state->packed_header_param[idx]);
459 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
460 length_in_bits = param->bit_length;
462 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
463 mfc_context->insert_object(ctx,
466 ALIGN(length_in_bits, 32) >> 5,
467 length_in_bits & 0x1f,
471 !param->has_emulation_bytes,
475 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
477 if (encode_state->packed_header_data[idx]) {
478 VAEncPackedHeaderParameterBuffer *param = NULL;
479 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
480 unsigned int length_in_bits;
482 assert(encode_state->packed_header_param[idx]);
483 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
484 length_in_bits = param->bit_length;
486 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
488 mfc_context->insert_object(ctx,
491 ALIGN(length_in_bits, 32) >> 5,
492 length_in_bits & 0x1f,
496 !param->has_emulation_bytes,
500 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
502 if (encode_state->packed_header_data[idx]) {
503 VAEncPackedHeaderParameterBuffer *param = NULL;
504 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
505 unsigned int length_in_bits;
507 assert(encode_state->packed_header_param[idx]);
508 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
509 length_in_bits = param->bit_length;
511 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
512 mfc_context->insert_object(ctx,
515 ALIGN(length_in_bits, 32) >> 5,
516 length_in_bits & 0x1f,
520 !param->has_emulation_bytes,
522 } else if (rate_control_mode == VA_RC_CBR) {
524 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
526 unsigned char *sei_data = NULL;
528 int length_in_bits = build_avc_sei_buffer_timing(
529 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
530 mfc_context->vui_hrd.i_initial_cpb_removal_delay,
532 mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
533 mfc_context->vui_hrd.i_dpb_output_delay_length,
536 mfc_context->insert_object(ctx,
538 (unsigned int *)sei_data,
539 ALIGN(length_in_bits, 32) >> 5,
540 length_in_bits & 0x1f,
550 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
551 struct encode_state *encode_state,
552 struct intel_encoder_context *encoder_context)
554 struct i965_driver_data *i965 = i965_driver_data(ctx);
555 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
556 struct object_surface *obj_surface;
557 struct object_buffer *obj_buffer;
558 GenAvcSurface *gen6_avc_surface;
560 VAStatus vaStatus = VA_STATUS_SUCCESS;
561 int i, j, enable_avc_ildb = 0;
562 VAEncSliceParameterBufferH264 *slice_param;
563 struct i965_coded_buffer_segment *coded_buffer_segment;
564 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
565 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
566 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
568 if (IS_GEN6(i965->intel.device_info)) {
569 /* On the SNB it should be fixed to 128 for the DMV buffer */
573 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
574 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
575 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
577 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
578 assert((slice_param->slice_type == SLICE_TYPE_I) ||
579 (slice_param->slice_type == SLICE_TYPE_SI) ||
580 (slice_param->slice_type == SLICE_TYPE_P) ||
581 (slice_param->slice_type == SLICE_TYPE_SP) ||
582 (slice_param->slice_type == SLICE_TYPE_B));
584 if (slice_param->disable_deblocking_filter_idc != 1) {
593 /*Setup all the input&output object*/
595 /* Setup current frame and current direct mv buffer*/
596 obj_surface = encode_state->reconstructed_object;
597 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
599 if ( obj_surface->private_data == NULL) {
600 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
601 assert(gen6_avc_surface);
602 gen6_avc_surface->dmv_top =
603 dri_bo_alloc(i965->intel.bufmgr,
605 68 * width_in_mbs * height_in_mbs,
607 gen6_avc_surface->dmv_bottom =
608 dri_bo_alloc(i965->intel.bufmgr,
610 68 * width_in_mbs * height_in_mbs,
612 assert(gen6_avc_surface->dmv_top);
613 assert(gen6_avc_surface->dmv_bottom);
614 obj_surface->private_data = (void *)gen6_avc_surface;
615 obj_surface->free_private_data = (void *)gen_free_avc_surface;
617 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
618 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
619 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
620 dri_bo_reference(gen6_avc_surface->dmv_top);
621 dri_bo_reference(gen6_avc_surface->dmv_bottom);
623 if (enable_avc_ildb) {
624 mfc_context->post_deblocking_output.bo = obj_surface->bo;
625 dri_bo_reference(mfc_context->post_deblocking_output.bo);
627 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
628 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
631 mfc_context->surface_state.width = obj_surface->orig_width;
632 mfc_context->surface_state.height = obj_surface->orig_height;
633 mfc_context->surface_state.w_pitch = obj_surface->width;
634 mfc_context->surface_state.h_pitch = obj_surface->height;
636 /* Setup reference frames and direct mv buffers*/
637 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
638 obj_surface = encode_state->reference_objects[i];
640 if (obj_surface && obj_surface->bo) {
641 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
642 dri_bo_reference(obj_surface->bo);
644 /* Check DMV buffer */
645 if ( obj_surface->private_data == NULL) {
647 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
648 assert(gen6_avc_surface);
649 gen6_avc_surface->dmv_top =
650 dri_bo_alloc(i965->intel.bufmgr,
652 68 * width_in_mbs * height_in_mbs,
654 gen6_avc_surface->dmv_bottom =
655 dri_bo_alloc(i965->intel.bufmgr,
657 68 * width_in_mbs * height_in_mbs,
659 assert(gen6_avc_surface->dmv_top);
660 assert(gen6_avc_surface->dmv_bottom);
661 obj_surface->private_data = gen6_avc_surface;
662 obj_surface->free_private_data = gen_free_avc_surface;
665 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
666 /* Setup DMV buffer */
667 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
668 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
669 dri_bo_reference(gen6_avc_surface->dmv_top);
670 dri_bo_reference(gen6_avc_surface->dmv_bottom);
676 mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
677 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
679 obj_buffer = encode_state->coded_buf_object;
680 bo = obj_buffer->buffer_store->bo;
681 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
682 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
683 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
684 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
687 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
688 coded_buffer_segment->mapped = 0;
689 coded_buffer_segment->codec = encoder_context->codec;
695 * The LUT uses the pair of 4-bit units: (shift, base) structure.
697 * So it is necessary to convert one cost into the nearest LUT format.
699 * 2^K *x = 2^n * (1 + deltaX)
700 * k + log2(x) = n + log2(1 + deltaX)
701 * log2(x) = n - k + log2(1 + deltaX)
702 * As X is in the range of [1, 15]
703 * 4 > n - k + log2(1 + deltaX) >= 0
704 * => n + log2(1 + deltaX) >= k > n - 4 + log2(1 + deltaX)
705 * Then we can derive the corresponding K and get the nearest LUT format.
707 int intel_format_lutvalue(int value, int max)
710 int logvalue, temp1, temp2;
715 logvalue = (int)(log2f((float)value));
719 int error, temp_value, base, j, temp_err;
721 j = logvalue - 4 + 1;
723 for(; j <= logvalue; j++) {
727 base = (value + (1 << (j - 1)) - 1) >> j;
732 temp_value = base << j;
733 temp_err = abs(value - temp_value);
734 if (temp_err < error) {
736 ret = (j << 4) | base;
742 temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
743 temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
752 #define VP8_QP_MAX 128
755 static float intel_lambda_qp(int qp)
757 float value, lambdaf;
759 value = value / 6 - 2;
762 lambdaf = roundf(powf(2, value));
767 void intel_h264_calc_mbmvcost_qp(int qp,
769 uint8_t *vme_state_message)
771 int m_cost, j, mv_count;
772 float lambda, m_costf;
774 assert(qp <= QP_MAX);
775 lambda = intel_lambda_qp(qp);
778 vme_state_message[MODE_CHROMA_INTRA] = 0;
779 vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
781 if (slice_type == SLICE_TYPE_I) {
782 vme_state_message[MODE_INTRA_16X16] = 0;
784 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
785 m_cost = lambda * 16;
786 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
788 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
791 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
792 for (j = 1; j < 3; j++) {
793 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
794 m_cost = (int)m_costf;
795 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
798 for (j = 4; j <= 64; j *= 2) {
799 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
800 m_cost = (int)m_costf;
801 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
806 vme_state_message[MODE_INTRA_16X16] = 0x4a;
807 vme_state_message[MODE_INTRA_8X8] = 0x4a;
808 vme_state_message[MODE_INTRA_4X4] = 0x4a;
809 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
810 vme_state_message[MODE_INTER_16X16] = 0x4a;
811 vme_state_message[MODE_INTER_16X8] = 0x4a;
812 vme_state_message[MODE_INTER_8X8] = 0x4a;
813 vme_state_message[MODE_INTER_8X4] = 0x4a;
814 vme_state_message[MODE_INTER_4X4] = 0x4a;
815 vme_state_message[MODE_INTER_BWD] = 0x2a;
818 m_costf = lambda * 10;
819 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
820 m_cost = lambda * 14;
821 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
822 m_cost = lambda * 24;
823 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
824 m_costf = lambda * 3.5;
826 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
827 if (slice_type == SLICE_TYPE_P) {
828 m_costf = lambda * 2.5;
830 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
831 m_costf = lambda * 4;
833 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
834 m_costf = lambda * 1.5;
836 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
837 m_costf = lambda * 3;
839 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
840 m_costf = lambda * 5;
842 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
843 /* BWD is not used in P-frame */
844 vme_state_message[MODE_INTER_BWD] = 0;
846 m_costf = lambda * 2.5;
848 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
849 m_costf = lambda * 5.5;
851 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
852 m_costf = lambda * 3.5;
854 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
855 m_costf = lambda * 5.0;
857 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
858 m_costf = lambda * 6.5;
860 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
861 m_costf = lambda * 1.5;
863 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
869 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
870 struct encode_state *encode_state,
871 struct intel_encoder_context *encoder_context)
873 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
874 struct gen6_vme_context *vme_context = encoder_context->vme_context;
875 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
876 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
878 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
880 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
882 if (encoder_context->rate_control_mode == VA_RC_CQP)
883 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
885 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
887 if (vme_state_message == NULL)
890 intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
893 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
894 struct encode_state *encode_state,
895 struct intel_encoder_context *encoder_context)
897 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
898 struct gen6_vme_context *vme_context = encoder_context->vme_context;
899 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
900 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
901 int qp, m_cost, j, mv_count;
902 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
903 float lambda, m_costf;
905 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
906 int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
908 if (vme_state_message == NULL)
911 if (encoder_context->rate_control_mode == VA_RC_CQP)
912 qp = q_matrix->quantization_index[0];
914 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
916 lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
919 vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
922 vme_state_message[MODE_INTRA_16X16] = 0;
923 m_cost = lambda * 16;
924 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
926 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
929 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
930 for (j = 1; j < 3; j++) {
931 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
932 m_cost = (int)m_costf;
933 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
936 for (j = 4; j <= 64; j *= 2) {
937 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
938 m_cost = (int)m_costf;
939 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
944 vme_state_message[MODE_INTRA_16X16] = 0x4a;
945 vme_state_message[MODE_INTRA_4X4] = 0x4a;
946 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
947 vme_state_message[MODE_INTER_16X16] = 0x4a;
948 vme_state_message[MODE_INTER_16X8] = 0x4a;
949 vme_state_message[MODE_INTER_8X8] = 0x4a;
950 vme_state_message[MODE_INTER_4X4] = 0x4a;
951 vme_state_message[MODE_INTER_BWD] = 0;
954 m_costf = lambda * 10;
955 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
956 m_cost = lambda * 24;
957 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
959 m_costf = lambda * 3.5;
961 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
963 m_costf = lambda * 2.5;
965 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
966 m_costf = lambda * 4;
968 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
969 m_costf = lambda * 1.5;
971 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
972 m_costf = lambda * 5;
974 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
975 /* BWD is not used in P-frame */
976 vme_state_message[MODE_INTER_BWD] = 0;
980 #define MB_SCOREBOARD_A (1 << 0)
981 #define MB_SCOREBOARD_B (1 << 1)
982 #define MB_SCOREBOARD_C (1 << 2)
984 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
986 vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
987 vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
988 vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
992 /* In VME prediction the current mb depends on the neighbour
993 * A/B/C macroblock. So the left/up/up-right dependency should
996 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
997 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
998 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
999 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
1000 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
1001 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
1003 vme_context->gpe_context.vfe_desc7.dword = 0;
1007 /* check whether the mb of (x_index, y_index) is out of bound */
1008 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
1011 if (x_index < 0 || x_index >= mb_width)
1013 if (y_index < 0 || y_index >= mb_height)
1016 mb_index = y_index * mb_width + x_index;
1017 if (mb_index < first_mb || mb_index > (first_mb + num_mb))
1023 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1024 struct encode_state *encode_state,
1025 int mb_width, int mb_height,
1027 int transform_8x8_mode_flag,
1028 struct intel_encoder_context *encoder_context)
1030 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1033 unsigned int *command_ptr;
1034 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1035 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1036 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1037 int qp,qp_mb,qp_index;
1038 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1040 if (encoder_context->rate_control_mode == VA_RC_CQP)
1041 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1043 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1045 #define USE_SCOREBOARD (1 << 21)
1047 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1048 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1050 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1051 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1052 int first_mb = pSliceParameter->macroblock_address;
1053 int num_mb = pSliceParameter->num_macroblocks;
1054 unsigned int mb_intra_ub, score_dep;
1055 int x_outer, y_outer, x_inner, y_inner;
1056 int xtemp_outer = 0;
1058 x_outer = first_mb % mb_width;
1059 y_outer = first_mb / mb_width;
1062 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1065 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1069 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1070 score_dep |= MB_SCOREBOARD_A;
1072 if (y_inner != mb_row) {
1073 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1074 score_dep |= MB_SCOREBOARD_B;
1076 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1077 if (x_inner != (mb_width -1)) {
1078 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1079 score_dep |= MB_SCOREBOARD_C;
1083 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1084 *command_ptr++ = kernel;
1085 *command_ptr++ = USE_SCOREBOARD;
1088 /* the (X, Y) term of scoreboard */
1089 *command_ptr++ = ((y_inner << 16) | x_inner);
1090 *command_ptr++ = score_dep;
1092 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1093 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1094 /* QP occupies one byte */
1095 if (vme_context->roi_enabled) {
1096 qp_index = y_inner * mb_width + x_inner;
1097 qp_mb = *(vme_context->qp_per_mb + qp_index);
1100 *command_ptr++ = qp_mb;
1107 xtemp_outer = mb_width - 2;
1108 if (xtemp_outer < 0)
1110 x_outer = xtemp_outer;
1111 y_outer = first_mb / mb_width;
1112 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1115 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1119 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1120 score_dep |= MB_SCOREBOARD_A;
1122 if (y_inner != mb_row) {
1123 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1124 score_dep |= MB_SCOREBOARD_B;
1126 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1128 if (x_inner != (mb_width -1)) {
1129 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1130 score_dep |= MB_SCOREBOARD_C;
1134 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1135 *command_ptr++ = kernel;
1136 *command_ptr++ = USE_SCOREBOARD;
1139 /* the (X, Y) term of scoreboard */
1140 *command_ptr++ = ((y_inner << 16) | x_inner);
1141 *command_ptr++ = score_dep;
1143 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1144 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1145 /* qp occupies one byte */
1146 if (vme_context->roi_enabled) {
1147 qp_index = y_inner * mb_width + x_inner;
1148 qp_mb = *(vme_context->qp_per_mb + qp_index);
1151 *command_ptr++ = qp_mb;
1157 if (x_outer >= mb_width) {
1159 x_outer = xtemp_outer;
1165 *command_ptr++ = MI_BATCH_BUFFER_END;
1167 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1171 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1173 unsigned int is_long_term =
1174 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1175 unsigned int is_top_field =
1176 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1177 unsigned int is_bottom_field =
1178 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1180 return ((is_long_term << 6) |
1181 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1182 (frame_store_id << 1) |
1183 ((is_top_field ^ 1) & is_bottom_field));
1187 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1188 struct encode_state *encode_state,
1189 struct intel_encoder_context *encoder_context)
1191 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1192 struct intel_batchbuffer *batch = encoder_context->base.batch;
1194 struct object_surface *obj_surface;
1195 unsigned int fref_entry, bref_entry;
1197 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1199 fref_entry = 0x80808080;
1200 bref_entry = 0x80808080;
1201 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1203 if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1204 int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1206 if (ref_idx_l0 > 3) {
1207 WARN_ONCE("ref_idx_l0 is out of range\n");
1211 obj_surface = vme_context->used_reference_objects[0];
1213 for (i = 0; i < 16; i++) {
1215 obj_surface == encode_state->reference_objects[i]) {
1220 if (frame_index == -1) {
1221 WARN_ONCE("RefPicList0 is not found in DPB!\n");
1223 int ref_idx_l0_shift = ref_idx_l0 * 8;
1224 fref_entry &= ~(0xFF << ref_idx_l0_shift);
1225 fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1229 if (slice_type == SLICE_TYPE_B) {
1230 int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1232 if (ref_idx_l1 > 3) {
1233 WARN_ONCE("ref_idx_l1 is out of range\n");
1237 obj_surface = vme_context->used_reference_objects[1];
1239 for (i = 0; i < 16; i++) {
1241 obj_surface == encode_state->reference_objects[i]) {
1246 if (frame_index == -1) {
1247 WARN_ONCE("RefPicList1 is not found in DPB!\n");
1249 int ref_idx_l1_shift = ref_idx_l1 * 8;
1250 bref_entry &= ~(0xFF << ref_idx_l1_shift);
1251 bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1255 BEGIN_BCS_BATCH(batch, 10);
1256 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1257 OUT_BCS_BATCH(batch, 0); //Select L0
1258 OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference
1259 for(i = 0; i < 7; i++) {
1260 OUT_BCS_BATCH(batch, 0x80808080);
1262 ADVANCE_BCS_BATCH(batch);
1264 BEGIN_BCS_BATCH(batch, 10);
1265 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1266 OUT_BCS_BATCH(batch, 1); //Select L1
1267 OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference
1268 for(i = 0; i < 7; i++) {
1269 OUT_BCS_BATCH(batch, 0x80808080);
1271 ADVANCE_BCS_BATCH(batch);
1275 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1276 struct encode_state *encode_state,
1277 struct intel_encoder_context *encoder_context)
1279 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1280 uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1281 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1282 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1283 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1284 uint32_t mv_x, mv_y;
1285 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1286 VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1287 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1289 if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1292 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1295 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1299 WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1304 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1305 if (pic_param->picture_type != VAEncPictureTypeIntra) {
1306 int qp, m_cost, j, mv_count;
1307 float lambda, m_costf;
1308 slice_param = (VAEncSliceParameterBufferMPEG2 *)
1309 encode_state->slice_params_ext[0]->buffer;
1310 qp = slice_param->quantiser_scale_code;
1311 lambda = intel_lambda_qp(qp);
1312 /* No Intra prediction. So it is zero */
1313 vme_state_message[MODE_INTRA_8X8] = 0;
1314 vme_state_message[MODE_INTRA_4X4] = 0;
1315 vme_state_message[MODE_INTER_MV0] = 0;
1316 for (j = 1; j < 3; j++) {
1317 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1318 m_cost = (int)m_costf;
1319 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1322 for (j = 4; j <= 64; j *= 2) {
1323 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1324 m_cost = (int)m_costf;
1325 vme_state_message[MODE_INTER_MV0 + mv_count] =
1326 intel_format_lutvalue(m_cost, 0x6f);
1330 /* It can only perform the 16x16 search. So mode cost can be ignored for
1331 * the other mode. for example: 16x8/8x8
1333 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1334 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1336 vme_state_message[MODE_INTER_16X8] = 0;
1337 vme_state_message[MODE_INTER_8X8] = 0;
1338 vme_state_message[MODE_INTER_8X4] = 0;
1339 vme_state_message[MODE_INTER_4X4] = 0;
1340 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1343 vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1345 vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1350 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1351 struct encode_state *encode_state,
1352 int mb_width, int mb_height,
1354 struct intel_encoder_context *encoder_context)
1356 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1357 unsigned int *command_ptr;
1359 #define MPEG2_SCOREBOARD (1 << 21)
1361 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1362 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1365 unsigned int mb_intra_ub, score_dep;
1366 int x_outer, y_outer, x_inner, y_inner;
1367 int xtemp_outer = 0;
1369 int num_mb = mb_width * mb_height;
1375 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1378 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1382 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1383 score_dep |= MB_SCOREBOARD_A;
1386 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1387 score_dep |= MB_SCOREBOARD_B;
1390 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1392 if (x_inner != (mb_width -1)) {
1393 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1394 score_dep |= MB_SCOREBOARD_C;
1398 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1399 *command_ptr++ = kernel;
1400 *command_ptr++ = MPEG2_SCOREBOARD;
1403 /* the (X, Y) term of scoreboard */
1404 *command_ptr++ = ((y_inner << 16) | x_inner);
1405 *command_ptr++ = score_dep;
1407 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1408 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1415 xtemp_outer = mb_width - 2;
1416 if (xtemp_outer < 0)
1418 x_outer = xtemp_outer;
1420 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1423 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1427 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1428 score_dep |= MB_SCOREBOARD_A;
1431 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1432 score_dep |= MB_SCOREBOARD_B;
1435 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1437 if (x_inner != (mb_width -1)) {
1438 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1439 score_dep |= MB_SCOREBOARD_C;
1443 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1444 *command_ptr++ = kernel;
1445 *command_ptr++ = MPEG2_SCOREBOARD;
1448 /* the (X, Y) term of scoreboard */
1449 *command_ptr++ = ((y_inner << 16) | x_inner);
1450 *command_ptr++ = score_dep;
1452 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1453 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1459 if (x_outer >= mb_width) {
1461 x_outer = xtemp_outer;
1467 *command_ptr++ = MI_BATCH_BUFFER_END;
1469 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1474 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1475 VAPictureH264 *ref_list,
1479 int i, found = -1, min = 0x7FFFFFFF;
1481 for (i = 0; i < num_pictures; i++) {
1484 if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1485 (ref_list[i].picture_id == VA_INVALID_SURFACE))
1488 tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1493 if (tmp > 0 && tmp < min) {
1503 intel_avc_vme_reference_state(VADriverContextP ctx,
1504 struct encode_state *encode_state,
1505 struct intel_encoder_context *encoder_context,
1508 void (* vme_source_surface_state)(
1509 VADriverContextP ctx,
1511 struct object_surface *obj_surface,
1512 struct intel_encoder_context *encoder_context))
1514 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1515 struct object_surface *obj_surface = NULL;
1516 struct i965_driver_data *i965 = i965_driver_data(ctx);
1517 VASurfaceID ref_surface_id;
1518 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1519 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1520 int max_num_references;
1521 VAPictureH264 *curr_pic;
1522 VAPictureH264 *ref_list;
1525 if (list_index == 0) {
1526 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1527 ref_list = slice_param->RefPicList0;
1529 max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1530 ref_list = slice_param->RefPicList1;
1533 if (max_num_references == 1) {
1534 if (list_index == 0) {
1535 ref_surface_id = slice_param->RefPicList0[0].picture_id;
1536 vme_context->used_references[0] = &slice_param->RefPicList0[0];
1538 ref_surface_id = slice_param->RefPicList1[0].picture_id;
1539 vme_context->used_references[1] = &slice_param->RefPicList1[0];
1542 if (ref_surface_id != VA_INVALID_SURFACE)
1543 obj_surface = SURFACE(ref_surface_id);
1547 obj_surface = encode_state->reference_objects[list_index];
1548 vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1553 curr_pic = &pic_param->CurrPic;
1555 /* select the reference frame in temporal space */
1556 ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1557 ref_surface_id = ref_list[ref_idx].picture_id;
1559 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1560 obj_surface = SURFACE(ref_surface_id);
1562 vme_context->used_reference_objects[list_index] = obj_surface;
1563 vme_context->used_references[list_index] = &ref_list[ref_idx];
1568 assert(ref_idx >= 0);
1569 vme_context->used_reference_objects[list_index] = obj_surface;
1570 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1571 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1576 vme_context->used_reference_objects[list_index] = NULL;
1577 vme_context->used_references[list_index] = NULL;
1578 vme_context->ref_index_in_mb[list_index] = 0;
1582 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1583 struct encode_state *encode_state,
1584 struct intel_encoder_context *encoder_context,
1586 struct intel_batchbuffer *slice_batch)
1588 int count, i, start_index;
1589 unsigned int length_in_bits;
1590 VAEncPackedHeaderParameterBuffer *param = NULL;
1591 unsigned int *header_data = NULL;
1592 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1593 int slice_header_index;
1595 if (encode_state->slice_header_index[slice_index] == 0)
1596 slice_header_index = -1;
1598 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1600 count = encode_state->slice_rawdata_count[slice_index];
1601 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1603 for (i = 0; i < count; i++) {
1604 unsigned int skip_emul_byte_cnt;
1606 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1608 param = (VAEncPackedHeaderParameterBuffer *)
1609 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1611 /* skip the slice header packed data type as it is lastly inserted */
1612 if (param->type == VAEncPackedHeaderSlice)
1615 length_in_bits = param->bit_length;
1617 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1619 /* as the slice header is still required, the last header flag is set to
1622 mfc_context->insert_object(ctx,
1625 ALIGN(length_in_bits, 32) >> 5,
1626 length_in_bits & 0x1f,
1630 !param->has_emulation_bytes,
1634 if (slice_header_index == -1) {
1635 unsigned char *slice_header = NULL;
1636 int slice_header_length_in_bits = 0;
1637 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1638 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1639 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1641 /* No slice header data is passed. And the driver needs to generate it */
1642 /* For the Normal H264 */
1643 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1647 mfc_context->insert_object(ctx, encoder_context,
1648 (unsigned int *)slice_header,
1649 ALIGN(slice_header_length_in_bits, 32) >> 5,
1650 slice_header_length_in_bits & 0x1f,
1651 5, /* first 5 bytes are start code + nal unit type */
1652 1, 0, 1, slice_batch);
1656 unsigned int skip_emul_byte_cnt;
1658 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1660 param = (VAEncPackedHeaderParameterBuffer *)
1661 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1662 length_in_bits = param->bit_length;
1664 /* as the slice header is the last header data for one slice,
1665 * the last header flag is set to one.
1667 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1669 mfc_context->insert_object(ctx,
1672 ALIGN(length_in_bits, 32) >> 5,
1673 length_in_bits & 0x1f,
1677 !param->has_emulation_bytes,
1685 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1686 struct encode_state *encode_state,
1687 struct intel_encoder_context *encoder_context)
1689 struct i965_driver_data *i965 = i965_driver_data(ctx);
1690 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1691 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1694 uint8_t *cost_table;
1696 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1699 if (slice_type == SLICE_TYPE_I) {
1700 if (vme_context->i_qp_cost_table)
1702 } else if (slice_type == SLICE_TYPE_P) {
1703 if (vme_context->p_qp_cost_table)
1706 if (vme_context->b_qp_cost_table)
1710 /* It is enough to allocate 32 bytes for each qp. */
1711 bo = dri_bo_alloc(i965->intel.bufmgr,
1717 assert(bo->virtual);
1718 cost_table = (uint8_t *)(bo->virtual);
1719 for (qp = 0; qp < QP_MAX; qp++) {
1720 intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1726 if (slice_type == SLICE_TYPE_I) {
1727 vme_context->i_qp_cost_table = bo;
1728 } else if (slice_type == SLICE_TYPE_P) {
1729 vme_context->p_qp_cost_table = bo;
1731 vme_context->b_qp_cost_table = bo;
1734 vme_context->cost_table_size = QP_MAX * 32;
1739 intel_h264_setup_cost_surface(VADriverContextP ctx,
1740 struct encode_state *encode_state,
1741 struct intel_encoder_context *encoder_context,
1742 unsigned long binding_table_offset,
1743 unsigned long surface_state_offset)
1745 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1746 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1750 struct i965_buffer_surface cost_table;
1752 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1755 if (slice_type == SLICE_TYPE_I) {
1756 bo = vme_context->i_qp_cost_table;
1757 } else if (slice_type == SLICE_TYPE_P) {
1758 bo = vme_context->p_qp_cost_table;
1760 bo = vme_context->b_qp_cost_table;
1764 cost_table.num_blocks = QP_MAX;
1765 cost_table.pitch = 16;
1766 cost_table.size_block = 32;
1768 vme_context->vme_buffer_suface_setup(ctx,
1769 &vme_context->gpe_context,
1771 binding_table_offset,
1772 surface_state_offset);
1776 * the idea of conversion between qp and qstep comes from scaling process
1777 * of transform coeff for Luma component in H264 spec.
1779 * In order to avoid too small qstep, it is multiplied by 16.
1781 static float intel_h264_qp_qstep(int qp)
1785 value = value / 6 - 2;
1786 qstep = powf(2, value);
1790 static int intel_h264_qstep_qp(float qstep)
1794 qp = 12.0f + 6.0f * log2f(qstep);
1800 * Currently it is based on the following assumption:
1801 * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1802 * total_aread * 1 / baseqp_qstep
1804 * qstep is the linearized quantizer of H264 quantizer
1807 int row_start_in_mb;
1809 int col_start_in_mb;
1819 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1821 struct encode_state *encode_state,
1822 struct intel_encoder_context *encoder_context)
1825 int min_qp = MAX(1, encoder_context->brc.min_qp);
1828 ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1833 float qstep_nonroi, qstep_base;
1834 float roi_area, total_area, nonroi_area;
1837 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1838 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1839 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1840 int mbs_in_picture = width_in_mbs * height_in_mbs;
1842 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1843 VAStatus vaStatus = VA_STATUS_SUCCESS;
1845 /* currently roi_value_is_qp_delta is the only supported mode of priority.
1847 * qp_delta set by user is added to base_qp, which is then clapped by
1848 * [base_qp-min_delta, base_qp+max_delta].
1850 ASSERT_RET(encoder_context->brc.roi_value_is_qp_delta, VA_STATUS_ERROR_INVALID_PARAMETER);
1852 num_roi = encoder_context->brc.num_roi;
1854 /* when the base_qp is lower than 12, the quality is quite good based
1855 * on the H264 test experience.
1856 * In such case it is unnecessary to adjust the quality for ROI region.
1858 if (base_qp <= 12) {
1859 nonroi_qp = base_qp;
1866 for (i = 0; i < num_roi; i++) {
1867 int row_start, row_end, col_start, col_end;
1868 int roi_width_mbs, roi_height_mbs;
1873 col_start = encoder_context->brc.roi[i].left;
1874 col_end = encoder_context->brc.roi[i].right;
1875 row_start = encoder_context->brc.roi[i].top;
1876 row_end = encoder_context->brc.roi[i].bottom;
1878 col_start = col_start / 16;
1879 col_end = (col_end + 15) / 16;
1880 row_start = row_start / 16;
1881 row_end = (row_end + 15) / 16;
1883 roi_width_mbs = col_end - col_start;
1884 roi_height_mbs = row_end - row_start;
1885 mbs_in_roi = roi_width_mbs * roi_height_mbs;
1887 param_regions[i].row_start_in_mb = row_start;
1888 param_regions[i].row_end_in_mb = row_end;
1889 param_regions[i].col_start_in_mb = col_start;
1890 param_regions[i].col_end_in_mb = col_end;
1891 param_regions[i].width_mbs = roi_width_mbs;
1892 param_regions[i].height_mbs = roi_height_mbs;
1894 roi_qp = base_qp + encoder_context->brc.roi[i].value;
1895 BRC_CLIP(roi_qp, min_qp, 51);
1897 param_regions[i].roi_qp = roi_qp;
1898 qstep_roi = intel_h264_qp_qstep(roi_qp);
1900 roi_area += mbs_in_roi;
1901 sum_roi += mbs_in_roi / qstep_roi;
1904 total_area = mbs_in_picture;
1905 nonroi_area = total_area - roi_area;
1907 qstep_base = intel_h264_qp_qstep(base_qp);
1908 temp = (total_area / qstep_base - sum_roi);
1913 qstep_nonroi = nonroi_area / temp;
1914 nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
1917 BRC_CLIP(nonroi_qp, min_qp, 51);
1920 memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
1924 for (i = 0; i < num_roi; i++) {
1925 for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
1926 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
1927 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
1935 intel_h264_enc_roi_config(VADriverContextP ctx,
1936 struct encode_state *encode_state,
1937 struct intel_encoder_context *encoder_context)
1941 struct i965_driver_data *i965 = i965_driver_data(ctx);
1942 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1943 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1944 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1945 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1946 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1948 int row_start, row_end, col_start, col_end;
1951 vme_context->roi_enabled = 0;
1952 /* Restriction: Disable ROI when multi-slice is enabled */
1953 if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1956 vme_context->roi_enabled = !!encoder_context->brc.num_roi;
1958 if (!vme_context->roi_enabled)
1961 if ((vme_context->saved_width_mbs != width_in_mbs) ||
1962 (vme_context->saved_height_mbs != height_in_mbs)) {
1963 free(vme_context->qp_per_mb);
1964 vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1966 vme_context->saved_width_mbs = width_in_mbs;
1967 vme_context->saved_height_mbs = height_in_mbs;
1968 assert(vme_context->qp_per_mb);
1970 if (encoder_context->rate_control_mode == VA_RC_CBR) {
1972 * TODO: More complex Qp adjust needs to be added.
1973 * Currently it is initialized to slice_qp.
1975 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1977 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1979 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1980 intel_h264_enc_roi_cbr(ctx, qp, encode_state, encoder_context);
1982 } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1983 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1984 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1986 int min_qp = MAX(1, encoder_context->brc.min_qp);
1988 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1989 memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1992 for (j = num_roi; j ; j--) {
1993 int qp_delta, qp_clip;
1995 col_start = encoder_context->brc.roi[i].left;
1996 col_end = encoder_context->brc.roi[i].right;
1997 row_start = encoder_context->brc.roi[i].top;
1998 row_end = encoder_context->brc.roi[i].bottom;
2000 col_start = col_start / 16;
2001 col_end = (col_end + 15) / 16;
2002 row_start = row_start / 16;
2003 row_end = (row_end + 15) / 16;
2005 qp_delta = encoder_context->brc.roi[i].value;
2006 qp_clip = qp + qp_delta;
2008 BRC_CLIP(qp_clip, min_qp, 51);
2010 for (i = row_start; i < row_end; i++) {
2011 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
2012 memset(qp_ptr, qp_clip, (col_end - col_start));
2017 * TODO: Disable it for non CBR-CQP.
2019 vme_context->roi_enabled = 0;
2022 if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2023 encoder_context->soft_batch_force = 1;
2030 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2031 VAPictureHEVC *ref_list,
2035 int i, found = -1, min = 0x7FFFFFFF;
2037 for (i = 0; i < num_pictures; i++) {
2040 if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2041 (ref_list[i].picture_id == VA_INVALID_SURFACE))
2044 tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2049 if (tmp > 0 && tmp < min) {
2058 intel_hevc_vme_reference_state(VADriverContextP ctx,
2059 struct encode_state *encode_state,
2060 struct intel_encoder_context *encoder_context,
2063 void (* vme_source_surface_state)(
2064 VADriverContextP ctx,
2066 struct object_surface *obj_surface,
2067 struct intel_encoder_context *encoder_context))
2069 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2070 struct object_surface *obj_surface = NULL;
2071 struct i965_driver_data *i965 = i965_driver_data(ctx);
2072 VASurfaceID ref_surface_id;
2073 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2074 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2075 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2076 int max_num_references;
2077 VAPictureHEVC *curr_pic;
2078 VAPictureHEVC *ref_list;
2080 unsigned int is_hevc10 = 0;
2081 GenHevcSurface *hevc_encoder_surface = NULL;
2083 if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2084 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2087 if (list_index == 0) {
2088 max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2089 ref_list = slice_param->ref_pic_list0;
2091 max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2092 ref_list = slice_param->ref_pic_list1;
2095 if (max_num_references == 1) {
2096 if (list_index == 0) {
2097 ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2098 vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2100 ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2101 vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2104 if (ref_surface_id != VA_INVALID_SURFACE)
2105 obj_surface = SURFACE(ref_surface_id);
2109 obj_surface = encode_state->reference_objects[list_index];
2110 vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2115 curr_pic = &pic_param->decoded_curr_pic;
2117 /* select the reference frame in temporal space */
2118 ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2119 ref_surface_id = ref_list[ref_idx].picture_id;
2121 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2122 obj_surface = SURFACE(ref_surface_id);
2124 vme_context->used_reference_objects[list_index] = obj_surface;
2125 vme_context->used_references[list_index] = &ref_list[ref_idx];
2130 assert(ref_idx >= 0);
2131 vme_context->used_reference_objects[list_index] = obj_surface;
2134 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2135 assert(hevc_encoder_surface);
2136 obj_surface = hevc_encoder_surface->nv12_surface_obj;
2138 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2139 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2144 vme_context->used_reference_objects[list_index] = NULL;
2145 vme_context->used_references[list_index] = NULL;
2146 vme_context->ref_index_in_mb[list_index] = 0;
2150 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2151 struct encode_state *encode_state,
2152 struct intel_encoder_context *encoder_context)
2154 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2155 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2156 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2157 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2158 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2159 int qp, m_cost, j, mv_count;
2160 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2161 float lambda, m_costf;
2163 /* here no SI SP slice for HEVC, do not need slice fixup */
2164 int slice_type = slice_param->slice_type;
2167 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2169 if(encoder_context->rate_control_mode == VA_RC_CBR)
2171 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2172 if(slice_type == HEVC_SLICE_B) {
2173 if(pSequenceParameter->ip_period == 1)
2175 slice_type = HEVC_SLICE_P;
2176 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2178 }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2179 slice_type = HEVC_SLICE_P;
2180 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2186 if (vme_state_message == NULL)
2189 assert(qp <= QP_MAX);
2190 lambda = intel_lambda_qp(qp);
2191 if (slice_type == HEVC_SLICE_I) {
2192 vme_state_message[MODE_INTRA_16X16] = 0;
2193 m_cost = lambda * 4;
2194 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2195 m_cost = lambda * 16;
2196 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2197 m_cost = lambda * 3;
2198 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2201 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2202 for (j = 1; j < 3; j++) {
2203 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2204 m_cost = (int)m_costf;
2205 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2208 for (j = 4; j <= 64; j *= 2) {
2209 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2210 m_cost = (int)m_costf;
2211 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2216 vme_state_message[MODE_INTRA_16X16] = 0x4a;
2217 vme_state_message[MODE_INTRA_8X8] = 0x4a;
2218 vme_state_message[MODE_INTRA_4X4] = 0x4a;
2219 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2220 vme_state_message[MODE_INTER_16X16] = 0x4a;
2221 vme_state_message[MODE_INTER_16X8] = 0x4a;
2222 vme_state_message[MODE_INTER_8X8] = 0x4a;
2223 vme_state_message[MODE_INTER_8X4] = 0x4a;
2224 vme_state_message[MODE_INTER_4X4] = 0x4a;
2225 vme_state_message[MODE_INTER_BWD] = 0x2a;
2228 m_costf = lambda * 10;
2229 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2230 m_cost = lambda * 14;
2231 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2232 m_cost = lambda * 24;
2233 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2234 m_costf = lambda * 3.5;
2236 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2237 if (slice_type == HEVC_SLICE_P) {
2238 m_costf = lambda * 2.5;
2240 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2241 m_costf = lambda * 4;
2243 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2244 m_costf = lambda * 1.5;
2246 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2247 m_costf = lambda * 3;
2249 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2250 m_costf = lambda * 5;
2252 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2253 /* BWD is not used in P-frame */
2254 vme_state_message[MODE_INTER_BWD] = 0;
2256 m_costf = lambda * 2.5;
2258 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2259 m_costf = lambda * 5.5;
2261 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2262 m_costf = lambda * 3.5;
2264 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2265 m_costf = lambda * 5.0;
2267 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2268 m_costf = lambda * 6.5;
2270 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2271 m_costf = lambda * 1.5;
2273 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);