2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
48 #define log2f(x) (logf(x)/(float)M_LN2)
51 int intel_avc_enc_slice_type_fixup(int slice_type)
53 if (slice_type == SLICE_TYPE_SP ||
54 slice_type == SLICE_TYPE_P)
55 slice_type = SLICE_TYPE_P;
56 else if (slice_type == SLICE_TYPE_SI ||
57 slice_type == SLICE_TYPE_I)
58 slice_type = SLICE_TYPE_I;
60 if (slice_type != SLICE_TYPE_B)
61 WARN_ONCE("Invalid slice type for H.264 encoding!\n");
63 slice_type = SLICE_TYPE_B;
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
71 struct gen6_mfc_context *mfc_context)
73 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
74 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
75 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
76 float fps = pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
77 int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
78 int intra_mb_size = inter_mb_size * 5.0;
81 mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
82 mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
83 mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
84 mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
85 mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
86 mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
88 for(i = 0 ; i < 3; i++) {
89 mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
90 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
91 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
92 mfc_context->bit_rate_control_context[i].GrowInit = 6;
93 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
94 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
95 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
97 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
98 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
99 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
100 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
101 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
102 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
105 mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
106 mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
107 mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
109 mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
110 mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
111 mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
114 static void intel_mfc_brc_init(struct encode_state *encode_state,
115 struct intel_encoder_context* encoder_context)
117 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
118 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
119 VAEncMiscParameterBuffer* pMiscParamHRD = NULL;
120 VAEncMiscParameterHRD* pParameterHRD = NULL;
121 double bitrate = pSequenceParameter->bits_per_second;
122 double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
123 int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
124 int intra_period = pSequenceParameter->intra_period;
125 int ip_period = pSequenceParameter->ip_period;
126 double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
127 double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
130 if (!encode_state->misc_param[VAEncMiscParameterTypeHRD] || !encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer)
133 pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
134 pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
136 if (pSequenceParameter->ip_period) {
137 pnum = (intra_period + ip_period - 1)/ip_period - 1;
138 bnum = intra_period - inum - pnum;
141 mfc_context->brc.mode = encoder_context->rate_control_mode;
143 mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
144 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
145 mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
146 mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
148 mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
149 mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
150 mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
152 bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
154 mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
155 mfc_context->hrd.current_buffer_fullness =
156 (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
157 pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
158 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
159 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
160 mfc_context->hrd.violation_noted = 0;
162 if ((bpf > qp51_size) && (bpf < qp1_size)) {
163 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
165 else if (bpf >= qp1_size)
166 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
167 else if (bpf <= qp51_size)
168 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
170 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
171 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
173 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
174 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
175 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
178 int intel_mfc_update_hrd(struct encode_state *encode_state,
179 struct gen6_mfc_context *mfc_context,
182 double prev_bf = mfc_context->hrd.current_buffer_fullness;
184 mfc_context->hrd.current_buffer_fullness -= frame_bits;
186 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
187 mfc_context->hrd.current_buffer_fullness = prev_bf;
188 return BRC_UNDERFLOW;
191 mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
192 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
193 if (mfc_context->brc.mode == VA_RC_VBR)
194 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
196 mfc_context->hrd.current_buffer_fullness = prev_bf;
200 return BRC_NO_HRD_VIOLATION;
203 int intel_mfc_brc_postpack(struct encode_state *encode_state,
204 struct gen6_mfc_context *mfc_context,
207 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
208 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
209 int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
210 int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
211 int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
212 int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
213 int qp; // quantizer of previously encoded slice of current type
214 int qpn; // predicted quantizer for next frame of current type in integer format
215 double qpf; // predicted quantizer for next frame of current type in float format
216 double delta_qp; // QP correction
217 int target_frame_size, frame_size_next;
219 * x - how far we are from HRD buffer borders
220 * y - how far we are from target HRD buffer fullness
223 double frame_size_alpha;
225 qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
227 target_frame_size = mfc_context->brc.target_frame_size[slicetype];
228 if (mfc_context->hrd.buffer_capacity < 5)
229 frame_size_alpha = 0;
231 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
232 if (frame_size_alpha > 30) frame_size_alpha = 30;
233 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
234 (double)(frame_size_alpha + 1.);
236 /* frame_size_next: avoiding negative number and too small value */
237 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
238 frame_size_next = (int)((double)target_frame_size * 0.25);
240 qpf = (double)qp * target_frame_size / frame_size_next;
241 qpn = (int)(qpf + 0.5);
244 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
245 mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
246 if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
248 mfc_context->brc.qpf_rounding_accumulator = 0.;
249 } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
251 mfc_context->brc.qpf_rounding_accumulator = 0.;
254 /* making sure that QP is not changing too fast */
255 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
256 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
257 /* making sure that with QP predictions we did do not leave QPs range */
258 BRC_CLIP(qpn, 1, 51);
260 /* checking wthether HRD compliance is still met */
261 sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits);
263 /* calculating QP delta as some function*/
264 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
266 x /= mfc_context->hrd.target_buffer_fullness;
267 y = mfc_context->hrd.current_buffer_fullness;
270 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
271 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
273 if (y < 0.01) y = 0.01;
275 else if (x < -1) x = -1;
277 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
278 qpn = (int)(qpn + delta_qp + 0.5);
280 /* making sure that with QP predictions we did do not leave QPs range */
281 BRC_CLIP(qpn, 1, 51);
283 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
284 /* correcting QPs of slices of other types */
285 if (slicetype == SLICE_TYPE_P) {
286 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
287 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
288 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
289 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
290 } else if (slicetype == SLICE_TYPE_I) {
291 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
292 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
293 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
294 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
295 } else { // SLICE_TYPE_B
296 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
297 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
298 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
299 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
301 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
302 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
303 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
304 } else if (sts == BRC_UNDERFLOW) { // underflow
305 if (qpn <= qp) qpn = qp + 1;
308 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
310 } else if (sts == BRC_OVERFLOW) {
311 if (qpn >= qp) qpn = qp - 1;
312 if (qpn < 1) { // < 0 (?) overflow with minQP
314 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
318 mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
323 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
324 struct intel_encoder_context *encoder_context)
326 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
327 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
328 unsigned int rate_control_mode = encoder_context->rate_control_mode;
329 int target_bit_rate = pSequenceParameter->bits_per_second;
331 // current we only support CBR mode.
332 if (rate_control_mode == VA_RC_CBR) {
333 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
334 mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
335 mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
336 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
337 mfc_context->vui_hrd.i_frame_number = 0;
339 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
340 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
341 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
347 intel_mfc_hrd_context_update(struct encode_state *encode_state,
348 struct gen6_mfc_context *mfc_context)
350 mfc_context->vui_hrd.i_frame_number++;
353 int intel_mfc_interlace_check(VADriverContextP ctx,
354 struct encode_state *encode_state,
355 struct intel_encoder_context *encoder_context)
357 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
358 VAEncSliceParameterBufferH264 *pSliceParameter;
361 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
362 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
364 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
365 pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
366 mbCount += pSliceParameter->num_macroblocks;
369 if ( mbCount == ( width_in_mbs * height_in_mbs ) )
376 * Check whether the parameters related with CBR are updated and decide whether
377 * it needs to reinitialize the configuration related with CBR.
378 * Currently it will check the following parameters:
381 * gop_configuration(intra_period, ip_period, intra_idr_period)
383 static bool intel_mfc_brc_updated_check(struct encode_state *encode_state,
384 struct intel_encoder_context *encoder_context)
386 unsigned int rate_control_mode = encoder_context->rate_control_mode;
387 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
388 double cur_fps, cur_bitrate;
389 VAEncSequenceParameterBufferH264 *pSequenceParameter;
392 if (rate_control_mode != VA_RC_CBR) {
396 pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
398 cur_bitrate = pSequenceParameter->bits_per_second;
399 cur_fps = (double)pSequenceParameter->time_scale /
400 (2 * (double)pSequenceParameter->num_units_in_tick);
402 if ((cur_bitrate == mfc_context->brc.saved_bps) &&
403 (cur_fps == mfc_context->brc.saved_fps) &&
404 (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) &&
405 (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) &&
406 (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) {
407 /* the parameters related with CBR are not updaetd */
411 mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period;
412 mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period;
413 mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period;
414 mfc_context->brc.saved_fps = cur_fps;
415 mfc_context->brc.saved_bps = cur_bitrate;
419 void intel_mfc_brc_prepare(struct encode_state *encode_state,
420 struct intel_encoder_context *encoder_context)
422 unsigned int rate_control_mode = encoder_context->rate_control_mode;
423 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
425 if (rate_control_mode == VA_RC_CBR) {
427 assert(encoder_context->codec != CODEC_MPEG2);
429 brc_updated = intel_mfc_brc_updated_check(encode_state, encoder_context);
431 /*Programing bit rate control */
432 if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) ||
434 intel_mfc_bit_rate_control_context_init(encode_state, mfc_context);
435 intel_mfc_brc_init(encode_state, encoder_context);
438 /*Programing HRD control */
439 if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated )
440 intel_mfc_hrd_context_init(encode_state, encoder_context);
444 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
445 struct encode_state *encode_state,
446 struct intel_encoder_context *encoder_context,
447 struct intel_batchbuffer *slice_batch)
449 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
450 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
451 unsigned int rate_control_mode = encoder_context->rate_control_mode;
452 unsigned int skip_emul_byte_cnt;
454 if (encode_state->packed_header_data[idx]) {
455 VAEncPackedHeaderParameterBuffer *param = NULL;
456 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
457 unsigned int length_in_bits;
459 assert(encode_state->packed_header_param[idx]);
460 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
461 length_in_bits = param->bit_length;
463 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
464 mfc_context->insert_object(ctx,
467 ALIGN(length_in_bits, 32) >> 5,
468 length_in_bits & 0x1f,
472 !param->has_emulation_bytes,
476 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
478 if (encode_state->packed_header_data[idx]) {
479 VAEncPackedHeaderParameterBuffer *param = NULL;
480 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
481 unsigned int length_in_bits;
483 assert(encode_state->packed_header_param[idx]);
484 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
485 length_in_bits = param->bit_length;
487 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
489 mfc_context->insert_object(ctx,
492 ALIGN(length_in_bits, 32) >> 5,
493 length_in_bits & 0x1f,
497 !param->has_emulation_bytes,
501 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
503 if (encode_state->packed_header_data[idx]) {
504 VAEncPackedHeaderParameterBuffer *param = NULL;
505 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
506 unsigned int length_in_bits;
508 assert(encode_state->packed_header_param[idx]);
509 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
510 length_in_bits = param->bit_length;
512 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
513 mfc_context->insert_object(ctx,
516 ALIGN(length_in_bits, 32) >> 5,
517 length_in_bits & 0x1f,
521 !param->has_emulation_bytes,
523 } else if (rate_control_mode == VA_RC_CBR) {
525 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
527 unsigned char *sei_data = NULL;
529 int length_in_bits = build_avc_sei_buffer_timing(
530 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
531 mfc_context->vui_hrd.i_initial_cpb_removal_delay,
533 mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
534 mfc_context->vui_hrd.i_dpb_output_delay_length,
537 mfc_context->insert_object(ctx,
539 (unsigned int *)sei_data,
540 ALIGN(length_in_bits, 32) >> 5,
541 length_in_bits & 0x1f,
551 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
552 struct encode_state *encode_state,
553 struct intel_encoder_context *encoder_context)
555 struct i965_driver_data *i965 = i965_driver_data(ctx);
556 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
557 struct object_surface *obj_surface;
558 struct object_buffer *obj_buffer;
559 GenAvcSurface *gen6_avc_surface;
561 VAStatus vaStatus = VA_STATUS_SUCCESS;
562 int i, j, enable_avc_ildb = 0;
563 VAEncSliceParameterBufferH264 *slice_param;
564 struct i965_coded_buffer_segment *coded_buffer_segment;
565 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
566 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
567 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
569 if (IS_GEN6(i965->intel.device_info)) {
570 /* On the SNB it should be fixed to 128 for the DMV buffer */
574 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
575 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
576 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
578 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
579 assert((slice_param->slice_type == SLICE_TYPE_I) ||
580 (slice_param->slice_type == SLICE_TYPE_SI) ||
581 (slice_param->slice_type == SLICE_TYPE_P) ||
582 (slice_param->slice_type == SLICE_TYPE_SP) ||
583 (slice_param->slice_type == SLICE_TYPE_B));
585 if (slice_param->disable_deblocking_filter_idc != 1) {
594 /*Setup all the input&output object*/
596 /* Setup current frame and current direct mv buffer*/
597 obj_surface = encode_state->reconstructed_object;
598 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
600 if ( obj_surface->private_data == NULL) {
601 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
602 assert(gen6_avc_surface);
603 gen6_avc_surface->dmv_top =
604 dri_bo_alloc(i965->intel.bufmgr,
606 68 * width_in_mbs * height_in_mbs,
608 gen6_avc_surface->dmv_bottom =
609 dri_bo_alloc(i965->intel.bufmgr,
611 68 * width_in_mbs * height_in_mbs,
613 assert(gen6_avc_surface->dmv_top);
614 assert(gen6_avc_surface->dmv_bottom);
615 obj_surface->private_data = (void *)gen6_avc_surface;
616 obj_surface->free_private_data = (void *)gen_free_avc_surface;
618 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
619 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
620 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
621 dri_bo_reference(gen6_avc_surface->dmv_top);
622 dri_bo_reference(gen6_avc_surface->dmv_bottom);
624 if (enable_avc_ildb) {
625 mfc_context->post_deblocking_output.bo = obj_surface->bo;
626 dri_bo_reference(mfc_context->post_deblocking_output.bo);
628 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
629 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
632 mfc_context->surface_state.width = obj_surface->orig_width;
633 mfc_context->surface_state.height = obj_surface->orig_height;
634 mfc_context->surface_state.w_pitch = obj_surface->width;
635 mfc_context->surface_state.h_pitch = obj_surface->height;
637 /* Setup reference frames and direct mv buffers*/
638 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
639 obj_surface = encode_state->reference_objects[i];
641 if (obj_surface && obj_surface->bo) {
642 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
643 dri_bo_reference(obj_surface->bo);
645 /* Check DMV buffer */
646 if ( obj_surface->private_data == NULL) {
648 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
649 assert(gen6_avc_surface);
650 gen6_avc_surface->dmv_top =
651 dri_bo_alloc(i965->intel.bufmgr,
653 68 * width_in_mbs * height_in_mbs,
655 gen6_avc_surface->dmv_bottom =
656 dri_bo_alloc(i965->intel.bufmgr,
658 68 * width_in_mbs * height_in_mbs,
660 assert(gen6_avc_surface->dmv_top);
661 assert(gen6_avc_surface->dmv_bottom);
662 obj_surface->private_data = gen6_avc_surface;
663 obj_surface->free_private_data = gen_free_avc_surface;
666 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
667 /* Setup DMV buffer */
668 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
669 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
670 dri_bo_reference(gen6_avc_surface->dmv_top);
671 dri_bo_reference(gen6_avc_surface->dmv_bottom);
677 mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
678 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
680 obj_buffer = encode_state->coded_buf_object;
681 bo = obj_buffer->buffer_store->bo;
682 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
683 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
684 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
685 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
688 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
689 coded_buffer_segment->mapped = 0;
690 coded_buffer_segment->codec = encoder_context->codec;
696 * The LUT uses the pair of 4-bit units: (shift, base) structure.
698 * So it is necessary to convert one cost into the nearest LUT format.
700 * 2^K *x = 2^n * (1 + deltaX)
701 * k + log2(x) = n + log2(1 + deltaX)
702 * log2(x) = n - k + log2(1 + deltaX)
703 * As X is in the range of [1, 15]
704 * 4 > n - k + log2(1 + deltaX) >= 0
705 * => n + log2(1 + deltaX) >= k > n - 4 + log2(1 + deltaX)
706 * Then we can derive the corresponding K and get the nearest LUT format.
708 int intel_format_lutvalue(int value, int max)
711 int logvalue, temp1, temp2;
716 logvalue = (int)(log2f((float)value));
720 int error, temp_value, base, j, temp_err;
722 j = logvalue - 4 + 1;
724 for(; j <= logvalue; j++) {
728 base = (value + (1 << (j - 1)) - 1) >> j;
733 temp_value = base << j;
734 temp_err = abs(value - temp_value);
735 if (temp_err < error) {
737 ret = (j << 4) | base;
743 temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
744 temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
753 #define VP8_QP_MAX 128
756 static float intel_lambda_qp(int qp)
758 float value, lambdaf;
760 value = value / 6 - 2;
763 lambdaf = roundf(powf(2, value));
768 void intel_h264_calc_mbmvcost_qp(int qp,
770 uint8_t *vme_state_message)
772 int m_cost, j, mv_count;
773 float lambda, m_costf;
775 assert(qp <= QP_MAX);
776 lambda = intel_lambda_qp(qp);
779 vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
780 vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
782 if (slice_type == SLICE_TYPE_I) {
783 vme_state_message[MODE_INTRA_16X16] = 0;
785 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
786 m_cost = lambda * 16;
787 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
789 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
792 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
793 for (j = 1; j < 3; j++) {
794 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
795 m_cost = (int)m_costf;
796 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
799 for (j = 4; j <= 64; j *= 2) {
800 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
801 m_cost = (int)m_costf;
802 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
807 vme_state_message[MODE_INTRA_16X16] = 0x4a;
808 vme_state_message[MODE_INTRA_8X8] = 0x4a;
809 vme_state_message[MODE_INTRA_4X4] = 0x4a;
810 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
811 vme_state_message[MODE_INTER_16X16] = 0x4a;
812 vme_state_message[MODE_INTER_16X8] = 0x4a;
813 vme_state_message[MODE_INTER_8X8] = 0x4a;
814 vme_state_message[MODE_INTER_8X4] = 0x4a;
815 vme_state_message[MODE_INTER_4X4] = 0x4a;
816 vme_state_message[MODE_INTER_BWD] = 0x2a;
819 m_costf = lambda * 10;
820 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
821 m_cost = lambda * 14;
822 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
823 m_cost = lambda * 24;
824 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
825 m_costf = lambda * 3.5;
827 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
828 if (slice_type == SLICE_TYPE_P) {
829 m_costf = lambda * 2.5;
831 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
832 m_costf = lambda * 4;
834 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
835 m_costf = lambda * 1.5;
837 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
838 m_costf = lambda * 3;
840 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
841 m_costf = lambda * 5;
843 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
844 /* BWD is not used in P-frame */
845 vme_state_message[MODE_INTER_BWD] = 0;
847 m_costf = lambda * 2.5;
849 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
850 m_costf = lambda * 5.5;
852 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
853 m_costf = lambda * 3.5;
855 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
856 m_costf = lambda * 5.0;
858 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
859 m_costf = lambda * 6.5;
861 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
862 m_costf = lambda * 1.5;
864 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
870 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
871 struct encode_state *encode_state,
872 struct intel_encoder_context *encoder_context)
874 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
875 struct gen6_vme_context *vme_context = encoder_context->vme_context;
876 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
877 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
879 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
881 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
883 if (encoder_context->rate_control_mode == VA_RC_CQP)
884 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
886 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
888 if (vme_state_message == NULL)
891 intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
894 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
895 struct encode_state *encode_state,
896 struct intel_encoder_context *encoder_context)
898 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
899 struct gen6_vme_context *vme_context = encoder_context->vme_context;
900 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
901 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
902 int qp, m_cost, j, mv_count;
903 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
904 float lambda, m_costf;
906 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
907 int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
909 if (vme_state_message == NULL)
912 if (encoder_context->rate_control_mode == VA_RC_CQP)
913 qp = q_matrix->quantization_index[0];
915 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
917 lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
920 vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
923 vme_state_message[MODE_INTRA_16X16] = 0;
924 m_cost = lambda * 16;
925 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
927 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
930 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
931 for (j = 1; j < 3; j++) {
932 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
933 m_cost = (int)m_costf;
934 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
937 for (j = 4; j <= 64; j *= 2) {
938 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
939 m_cost = (int)m_costf;
940 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
945 vme_state_message[MODE_INTRA_16X16] = 0x4a;
946 vme_state_message[MODE_INTRA_4X4] = 0x4a;
947 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
948 vme_state_message[MODE_INTER_16X16] = 0x4a;
949 vme_state_message[MODE_INTER_16X8] = 0x4a;
950 vme_state_message[MODE_INTER_8X8] = 0x4a;
951 vme_state_message[MODE_INTER_4X4] = 0x4a;
952 vme_state_message[MODE_INTER_BWD] = 0;
955 m_costf = lambda * 10;
956 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
957 m_cost = lambda * 24;
958 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
960 m_costf = lambda * 3.5;
962 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
964 m_costf = lambda * 2.5;
966 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
967 m_costf = lambda * 4;
969 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
970 m_costf = lambda * 1.5;
972 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
973 m_costf = lambda * 5;
975 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
976 /* BWD is not used in P-frame */
977 vme_state_message[MODE_INTER_BWD] = 0;
981 #define MB_SCOREBOARD_A (1 << 0)
982 #define MB_SCOREBOARD_B (1 << 1)
983 #define MB_SCOREBOARD_C (1 << 2)
985 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
987 vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
988 vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
989 vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
993 /* In VME prediction the current mb depends on the neighbour
994 * A/B/C macroblock. So the left/up/up-right dependency should
997 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
998 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
999 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
1000 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
1001 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
1002 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
1004 vme_context->gpe_context.vfe_desc7.dword = 0;
1008 /* check whether the mb of (x_index, y_index) is out of bound */
1009 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
1012 if (x_index < 0 || x_index >= mb_width)
1014 if (y_index < 0 || y_index >= mb_height)
1017 mb_index = y_index * mb_width + x_index;
1018 if (mb_index < first_mb || mb_index > (first_mb + num_mb))
1024 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1025 struct encode_state *encode_state,
1026 int mb_width, int mb_height,
1028 int transform_8x8_mode_flag,
1029 struct intel_encoder_context *encoder_context)
1031 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1034 unsigned int *command_ptr;
1035 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1036 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1037 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1039 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1041 if (encoder_context->rate_control_mode == VA_RC_CQP)
1042 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1044 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1046 #define USE_SCOREBOARD (1 << 21)
1048 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1049 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1051 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1052 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1053 int first_mb = pSliceParameter->macroblock_address;
1054 int num_mb = pSliceParameter->num_macroblocks;
1055 unsigned int mb_intra_ub, score_dep;
1056 int x_outer, y_outer, x_inner, y_inner;
1057 int xtemp_outer = 0;
1059 x_outer = first_mb % mb_width;
1060 y_outer = first_mb / mb_width;
1063 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1066 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1070 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1071 score_dep |= MB_SCOREBOARD_A;
1073 if (y_inner != mb_row) {
1074 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1075 score_dep |= MB_SCOREBOARD_B;
1077 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1078 if (x_inner != (mb_width -1)) {
1079 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1080 score_dep |= MB_SCOREBOARD_C;
1084 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1085 *command_ptr++ = kernel;
1086 *command_ptr++ = USE_SCOREBOARD;
1089 /* the (X, Y) term of scoreboard */
1090 *command_ptr++ = ((y_inner << 16) | x_inner);
1091 *command_ptr++ = score_dep;
1093 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1094 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1095 /* QP occupies one byte */
1096 *command_ptr++ = qp;
1103 xtemp_outer = mb_width - 2;
1104 if (xtemp_outer < 0)
1106 x_outer = xtemp_outer;
1107 y_outer = first_mb / mb_width;
1108 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1111 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1115 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1116 score_dep |= MB_SCOREBOARD_A;
1118 if (y_inner != mb_row) {
1119 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1120 score_dep |= MB_SCOREBOARD_B;
1122 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1124 if (x_inner != (mb_width -1)) {
1125 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1126 score_dep |= MB_SCOREBOARD_C;
1130 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1131 *command_ptr++ = kernel;
1132 *command_ptr++ = USE_SCOREBOARD;
1135 /* the (X, Y) term of scoreboard */
1136 *command_ptr++ = ((y_inner << 16) | x_inner);
1137 *command_ptr++ = score_dep;
1139 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1140 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1141 /* qp occupies one byte */
1142 *command_ptr++ = qp;
1148 if (x_outer >= mb_width) {
1150 x_outer = xtemp_outer;
1156 *command_ptr++ = MI_BATCH_BUFFER_END;
1158 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1162 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1164 unsigned int is_long_term =
1165 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1166 unsigned int is_top_field =
1167 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1168 unsigned int is_bottom_field =
1169 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1171 return ((is_long_term << 6) |
1172 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1173 (frame_store_id << 1) |
1174 ((is_top_field ^ 1) & is_bottom_field));
1178 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1179 struct encode_state *encode_state,
1180 struct intel_encoder_context *encoder_context)
1182 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1183 struct intel_batchbuffer *batch = encoder_context->base.batch;
1185 struct object_surface *obj_surface;
1186 unsigned int fref_entry, bref_entry;
1188 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1190 fref_entry = 0x80808080;
1191 bref_entry = 0x80808080;
1192 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1194 if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1195 int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1197 if (ref_idx_l0 > 3) {
1198 WARN_ONCE("ref_idx_l0 is out of range\n");
1202 obj_surface = vme_context->used_reference_objects[0];
1204 for (i = 0; i < 16; i++) {
1206 obj_surface == encode_state->reference_objects[i]) {
1211 if (frame_index == -1) {
1212 WARN_ONCE("RefPicList0 is not found in DPB!\n");
1214 int ref_idx_l0_shift = ref_idx_l0 * 8;
1215 fref_entry &= ~(0xFF << ref_idx_l0_shift);
1216 fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1220 if (slice_type == SLICE_TYPE_B) {
1221 int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1223 if (ref_idx_l1 > 3) {
1224 WARN_ONCE("ref_idx_l1 is out of range\n");
1228 obj_surface = vme_context->used_reference_objects[1];
1230 for (i = 0; i < 16; i++) {
1232 obj_surface == encode_state->reference_objects[i]) {
1237 if (frame_index == -1) {
1238 WARN_ONCE("RefPicList1 is not found in DPB!\n");
1240 int ref_idx_l1_shift = ref_idx_l1 * 8;
1241 bref_entry &= ~(0xFF << ref_idx_l1_shift);
1242 bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1246 BEGIN_BCS_BATCH(batch, 10);
1247 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1248 OUT_BCS_BATCH(batch, 0); //Select L0
1249 OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference
1250 for(i = 0; i < 7; i++) {
1251 OUT_BCS_BATCH(batch, 0x80808080);
1253 ADVANCE_BCS_BATCH(batch);
1255 BEGIN_BCS_BATCH(batch, 10);
1256 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1257 OUT_BCS_BATCH(batch, 1); //Select L1
1258 OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference
1259 for(i = 0; i < 7; i++) {
1260 OUT_BCS_BATCH(batch, 0x80808080);
1262 ADVANCE_BCS_BATCH(batch);
1266 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1267 struct encode_state *encode_state,
1268 struct intel_encoder_context *encoder_context)
1270 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1271 uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1272 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1273 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1274 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1275 uint32_t mv_x, mv_y;
1276 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1277 VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1278 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1280 if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1283 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1286 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1290 WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1295 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1296 if (pic_param->picture_type != VAEncPictureTypeIntra) {
1297 int qp, m_cost, j, mv_count;
1298 float lambda, m_costf;
1299 slice_param = (VAEncSliceParameterBufferMPEG2 *)
1300 encode_state->slice_params_ext[0]->buffer;
1301 qp = slice_param->quantiser_scale_code;
1302 lambda = intel_lambda_qp(qp);
1303 /* No Intra prediction. So it is zero */
1304 vme_state_message[MODE_INTRA_8X8] = 0;
1305 vme_state_message[MODE_INTRA_4X4] = 0;
1306 vme_state_message[MODE_INTER_MV0] = 0;
1307 for (j = 1; j < 3; j++) {
1308 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1309 m_cost = (int)m_costf;
1310 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1313 for (j = 4; j <= 64; j *= 2) {
1314 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1315 m_cost = (int)m_costf;
1316 vme_state_message[MODE_INTER_MV0 + mv_count] =
1317 intel_format_lutvalue(m_cost, 0x6f);
1321 /* It can only perform the 16x16 search. So mode cost can be ignored for
1322 * the other mode. for example: 16x8/8x8
1324 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1325 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1327 vme_state_message[MODE_INTER_16X8] = 0;
1328 vme_state_message[MODE_INTER_8X8] = 0;
1329 vme_state_message[MODE_INTER_8X4] = 0;
1330 vme_state_message[MODE_INTER_4X4] = 0;
1331 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1334 vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1336 vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1341 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1342 struct encode_state *encode_state,
1343 int mb_width, int mb_height,
1345 struct intel_encoder_context *encoder_context)
1347 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1348 unsigned int *command_ptr;
1350 #define MPEG2_SCOREBOARD (1 << 21)
1352 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1353 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1356 unsigned int mb_intra_ub, score_dep;
1357 int x_outer, y_outer, x_inner, y_inner;
1358 int xtemp_outer = 0;
1360 int num_mb = mb_width * mb_height;
1366 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1369 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1373 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1374 score_dep |= MB_SCOREBOARD_A;
1377 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1378 score_dep |= MB_SCOREBOARD_B;
1381 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1383 if (x_inner != (mb_width -1)) {
1384 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1385 score_dep |= MB_SCOREBOARD_C;
1389 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1390 *command_ptr++ = kernel;
1391 *command_ptr++ = MPEG2_SCOREBOARD;
1394 /* the (X, Y) term of scoreboard */
1395 *command_ptr++ = ((y_inner << 16) | x_inner);
1396 *command_ptr++ = score_dep;
1398 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1399 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1406 xtemp_outer = mb_width - 2;
1407 if (xtemp_outer < 0)
1409 x_outer = xtemp_outer;
1411 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1414 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1418 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1419 score_dep |= MB_SCOREBOARD_A;
1422 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1423 score_dep |= MB_SCOREBOARD_B;
1426 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1428 if (x_inner != (mb_width -1)) {
1429 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1430 score_dep |= MB_SCOREBOARD_C;
1434 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1435 *command_ptr++ = kernel;
1436 *command_ptr++ = MPEG2_SCOREBOARD;
1439 /* the (X, Y) term of scoreboard */
1440 *command_ptr++ = ((y_inner << 16) | x_inner);
1441 *command_ptr++ = score_dep;
1443 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1444 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1450 if (x_outer >= mb_width) {
1452 x_outer = xtemp_outer;
1458 *command_ptr++ = MI_BATCH_BUFFER_END;
1460 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1465 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1466 VAPictureH264 *ref_list,
1470 int i, found = -1, min = 0x7FFFFFFF;
1472 for (i = 0; i < num_pictures; i++) {
1475 if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1476 (ref_list[i].picture_id == VA_INVALID_SURFACE))
1479 tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1484 if (tmp > 0 && tmp < min) {
1494 intel_avc_vme_reference_state(VADriverContextP ctx,
1495 struct encode_state *encode_state,
1496 struct intel_encoder_context *encoder_context,
1499 void (* vme_source_surface_state)(
1500 VADriverContextP ctx,
1502 struct object_surface *obj_surface,
1503 struct intel_encoder_context *encoder_context))
1505 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1506 struct object_surface *obj_surface = NULL;
1507 struct i965_driver_data *i965 = i965_driver_data(ctx);
1508 VASurfaceID ref_surface_id;
1509 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1510 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1511 int max_num_references;
1512 VAPictureH264 *curr_pic;
1513 VAPictureH264 *ref_list;
1516 if (list_index == 0) {
1517 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1518 ref_list = slice_param->RefPicList0;
1520 max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1521 ref_list = slice_param->RefPicList1;
1524 if (max_num_references == 1) {
1525 if (list_index == 0) {
1526 ref_surface_id = slice_param->RefPicList0[0].picture_id;
1527 vme_context->used_references[0] = &slice_param->RefPicList0[0];
1529 ref_surface_id = slice_param->RefPicList1[0].picture_id;
1530 vme_context->used_references[1] = &slice_param->RefPicList1[0];
1533 if (ref_surface_id != VA_INVALID_SURFACE)
1534 obj_surface = SURFACE(ref_surface_id);
1538 obj_surface = encode_state->reference_objects[list_index];
1539 vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1544 curr_pic = &pic_param->CurrPic;
1546 /* select the reference frame in temporal space */
1547 ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1548 ref_surface_id = ref_list[ref_idx].picture_id;
1550 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1551 obj_surface = SURFACE(ref_surface_id);
1553 vme_context->used_reference_objects[list_index] = obj_surface;
1554 vme_context->used_references[list_index] = &ref_list[ref_idx];
1559 assert(ref_idx >= 0);
1560 vme_context->used_reference_objects[list_index] = obj_surface;
1561 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1562 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1567 vme_context->used_reference_objects[list_index] = NULL;
1568 vme_context->used_references[list_index] = NULL;
1569 vme_context->ref_index_in_mb[list_index] = 0;
1573 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1574 struct encode_state *encode_state,
1575 struct intel_encoder_context *encoder_context,
1577 struct intel_batchbuffer *slice_batch)
1579 int count, i, start_index;
1580 unsigned int length_in_bits;
1581 VAEncPackedHeaderParameterBuffer *param = NULL;
1582 unsigned int *header_data = NULL;
1583 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1584 int slice_header_index;
1586 if (encode_state->slice_header_index[slice_index] == 0)
1587 slice_header_index = -1;
1589 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1591 count = encode_state->slice_rawdata_count[slice_index];
1592 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1594 for (i = 0; i < count; i++) {
1595 unsigned int skip_emul_byte_cnt;
1597 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1599 param = (VAEncPackedHeaderParameterBuffer *)
1600 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1602 /* skip the slice header packed data type as it is lastly inserted */
1603 if (param->type == VAEncPackedHeaderSlice)
1606 length_in_bits = param->bit_length;
1608 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1610 /* as the slice header is still required, the last header flag is set to
1613 mfc_context->insert_object(ctx,
1616 ALIGN(length_in_bits, 32) >> 5,
1617 length_in_bits & 0x1f,
1621 !param->has_emulation_bytes,
1625 if (slice_header_index == -1) {
1626 unsigned char *slice_header = NULL;
1627 int slice_header_length_in_bits = 0;
1628 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1629 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1630 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1632 /* No slice header data is passed. And the driver needs to generate it */
1633 /* For the Normal H264 */
1634 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1638 mfc_context->insert_object(ctx, encoder_context,
1639 (unsigned int *)slice_header,
1640 ALIGN(slice_header_length_in_bits, 32) >> 5,
1641 slice_header_length_in_bits & 0x1f,
1642 5, /* first 5 bytes are start code + nal unit type */
1643 1, 0, 1, slice_batch);
1647 unsigned int skip_emul_byte_cnt;
1649 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1651 param = (VAEncPackedHeaderParameterBuffer *)
1652 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1653 length_in_bits = param->bit_length;
1655 /* as the slice header is the last header data for one slice,
1656 * the last header flag is set to one.
1658 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1660 mfc_context->insert_object(ctx,
1663 ALIGN(length_in_bits, 32) >> 5,
1664 length_in_bits & 0x1f,
1668 !param->has_emulation_bytes,
1676 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1677 struct encode_state *encode_state,
1678 struct intel_encoder_context *encoder_context)
1680 struct i965_driver_data *i965 = i965_driver_data(ctx);
1681 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1682 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1685 uint8_t *cost_table;
1687 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1690 if (slice_type == SLICE_TYPE_I) {
1691 if (vme_context->i_qp_cost_table)
1693 } else if (slice_type == SLICE_TYPE_P) {
1694 if (vme_context->p_qp_cost_table)
1697 if (vme_context->b_qp_cost_table)
1701 /* It is enough to allocate 32 bytes for each qp. */
1702 bo = dri_bo_alloc(i965->intel.bufmgr,
1708 assert(bo->virtual);
1709 cost_table = (uint8_t *)(bo->virtual);
1710 for (qp = 0; qp < QP_MAX; qp++) {
1711 intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1717 if (slice_type == SLICE_TYPE_I) {
1718 vme_context->i_qp_cost_table = bo;
1719 } else if (slice_type == SLICE_TYPE_P) {
1720 vme_context->p_qp_cost_table = bo;
1722 vme_context->b_qp_cost_table = bo;
1725 vme_context->cost_table_size = QP_MAX * 32;
1730 intel_h264_setup_cost_surface(VADriverContextP ctx,
1731 struct encode_state *encode_state,
1732 struct intel_encoder_context *encoder_context,
1733 unsigned long binding_table_offset,
1734 unsigned long surface_state_offset)
1736 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1737 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1741 struct i965_buffer_surface cost_table;
1743 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1746 if (slice_type == SLICE_TYPE_I) {
1747 bo = vme_context->i_qp_cost_table;
1748 } else if (slice_type == SLICE_TYPE_P) {
1749 bo = vme_context->p_qp_cost_table;
1751 bo = vme_context->b_qp_cost_table;
1755 cost_table.num_blocks = QP_MAX;
1756 cost_table.pitch = 16;
1757 cost_table.size_block = 32;
1759 vme_context->vme_buffer_suface_setup(ctx,
1760 &vme_context->gpe_context,
1762 binding_table_offset,
1763 surface_state_offset);
1767 intel_h264_enc_roi_config(VADriverContextP ctx,
1768 struct encode_state *encode_state,
1769 struct intel_encoder_context *encoder_context)
1771 VAEncMiscParameterBuffer* pMiscParamROI;
1772 VAEncMiscParameterBufferROI *pParamROI;
1773 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1774 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1775 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1776 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1777 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1779 vme_context->roi_enabled = 0;
1780 /* Restriction: Disable ROI when multi-slice is enabled */
1781 if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1784 if (encode_state->misc_param[VAEncMiscParameterTypeROI] == NULL) {
1788 pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI]->buffer;
1789 pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
1791 /* check whether number of ROI is correct */
1792 /* currently one region is supported */
1793 if (pParamROI->num_roi != 1) {
1797 vme_context->roi_enabled = 1;
1799 if ((vme_context->saved_width_mbs != width_in_mbs) ||
1800 (vme_context->saved_height_mbs != height_in_mbs)) {
1801 free(vme_context->qp_per_mb);
1802 vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1804 vme_context->saved_width_mbs = width_in_mbs;
1805 vme_context->saved_height_mbs = height_in_mbs;
1806 assert(vme_context->qp_per_mb);
1808 if (encoder_context->rate_control_mode == VA_RC_CBR) {
1810 * TODO: More complex Qp adjust needs to be added.
1811 * Currently it is initialized to slice_qp.
1813 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1815 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1817 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1818 memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1819 } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1820 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1821 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1824 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1825 memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1828 * TODO: Disable it for non CBR-CQP.
1830 vme_context->roi_enabled = 0;
1837 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
1838 VAPictureHEVC *ref_list,
1842 int i, found = -1, min = 0x7FFFFFFF;
1844 for (i = 0; i < num_pictures; i++) {
1847 if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
1848 (ref_list[i].picture_id == VA_INVALID_SURFACE))
1851 tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
1856 if (tmp > 0 && tmp < min) {
1865 intel_hevc_vme_reference_state(VADriverContextP ctx,
1866 struct encode_state *encode_state,
1867 struct intel_encoder_context *encoder_context,
1870 void (* vme_source_surface_state)(
1871 VADriverContextP ctx,
1873 struct object_surface *obj_surface,
1874 struct intel_encoder_context *encoder_context))
1876 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1877 struct object_surface *obj_surface = NULL;
1878 struct i965_driver_data *i965 = i965_driver_data(ctx);
1879 VASurfaceID ref_surface_id;
1880 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1881 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1882 int max_num_references;
1883 VAPictureHEVC *curr_pic;
1884 VAPictureHEVC *ref_list;
1887 if (list_index == 0) {
1888 max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
1889 ref_list = slice_param->ref_pic_list0;
1891 max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
1892 ref_list = slice_param->ref_pic_list1;
1895 if (max_num_references == 1) {
1896 if (list_index == 0) {
1897 ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
1898 vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
1900 ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
1901 vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
1904 if (ref_surface_id != VA_INVALID_SURFACE)
1905 obj_surface = SURFACE(ref_surface_id);
1909 obj_surface = encode_state->reference_objects[list_index];
1910 vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
1915 curr_pic = &pic_param->decoded_curr_pic;
1917 /* select the reference frame in temporal space */
1918 ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1919 ref_surface_id = ref_list[ref_idx].picture_id;
1921 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1922 obj_surface = SURFACE(ref_surface_id);
1924 vme_context->used_reference_objects[list_index] = obj_surface;
1925 vme_context->used_references[list_index] = &ref_list[ref_idx];
1930 assert(ref_idx >= 0);
1931 vme_context->used_reference_objects[list_index] = obj_surface;
1932 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1933 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1938 vme_context->used_reference_objects[list_index] = NULL;
1939 vme_context->used_references[list_index] = NULL;
1940 vme_context->ref_index_in_mb[list_index] = 0;
1944 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
1945 struct encode_state *encode_state,
1946 struct intel_encoder_context *encoder_context)
1948 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1949 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1950 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1951 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1952 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1953 int qp, m_cost, j, mv_count;
1954 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
1955 float lambda, m_costf;
1957 /* here no SI SP slice for HEVC, do not need slice fixup */
1958 int slice_type = slice_param->slice_type;
1961 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1963 if(encoder_context->rate_control_mode == VA_RC_CBR)
1965 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1966 if(slice_type == HEVC_SLICE_B) {
1967 if(pSequenceParameter->ip_period == 1)
1969 slice_type = HEVC_SLICE_P;
1970 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1972 }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
1973 slice_type = HEVC_SLICE_P;
1974 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1980 if (vme_state_message == NULL)
1983 assert(qp <= QP_MAX);
1984 lambda = intel_lambda_qp(qp);
1985 if (slice_type == HEVC_SLICE_I) {
1986 vme_state_message[MODE_INTRA_16X16] = 0;
1987 m_cost = lambda * 4;
1988 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
1989 m_cost = lambda * 16;
1990 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
1991 m_cost = lambda * 3;
1992 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
1995 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
1996 for (j = 1; j < 3; j++) {
1997 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1998 m_cost = (int)m_costf;
1999 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2002 for (j = 4; j <= 64; j *= 2) {
2003 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2004 m_cost = (int)m_costf;
2005 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2010 vme_state_message[MODE_INTRA_16X16] = 0x4a;
2011 vme_state_message[MODE_INTRA_8X8] = 0x4a;
2012 vme_state_message[MODE_INTRA_4X4] = 0x4a;
2013 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2014 vme_state_message[MODE_INTER_16X16] = 0x4a;
2015 vme_state_message[MODE_INTER_16X8] = 0x4a;
2016 vme_state_message[MODE_INTER_8X8] = 0x4a;
2017 vme_state_message[MODE_INTER_8X4] = 0x4a;
2018 vme_state_message[MODE_INTER_4X4] = 0x4a;
2019 vme_state_message[MODE_INTER_BWD] = 0x2a;
2022 m_costf = lambda * 10;
2023 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2024 m_cost = lambda * 14;
2025 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2026 m_cost = lambda * 24;
2027 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2028 m_costf = lambda * 3.5;
2030 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2031 if (slice_type == HEVC_SLICE_P) {
2032 m_costf = lambda * 2.5;
2034 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2035 m_costf = lambda * 4;
2037 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2038 m_costf = lambda * 1.5;
2040 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2041 m_costf = lambda * 3;
2043 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2044 m_costf = lambda * 5;
2046 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2047 /* BWD is not used in P-frame */
2048 vme_state_message[MODE_INTER_BWD] = 0;
2050 m_costf = lambda * 2.5;
2052 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2053 m_costf = lambda * 5.5;
2055 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2056 m_costf = lambda * 3.5;
2058 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2059 m_costf = lambda * 5.0;
2061 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2062 m_costf = lambda * 6.5;
2064 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2065 m_costf = lambda * 1.5;
2067 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);