2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
48 #define log2f(x) (logf(x)/(float)M_LN2)
51 int intel_avc_enc_slice_type_fixup(int slice_type)
53 if (slice_type == SLICE_TYPE_SP ||
54 slice_type == SLICE_TYPE_P)
55 slice_type = SLICE_TYPE_P;
56 else if (slice_type == SLICE_TYPE_SI ||
57 slice_type == SLICE_TYPE_I)
58 slice_type = SLICE_TYPE_I;
60 if (slice_type != SLICE_TYPE_B)
61 WARN_ONCE("Invalid slice type for H.264 encoding!\n");
63 slice_type = SLICE_TYPE_B;
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
71 struct intel_encoder_context *encoder_context)
73 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
76 for (i = 0 ; i < 3; i++) {
77 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79 mfc_context->bit_rate_control_context[i].GrowInit = 6;
80 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
84 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94 struct intel_encoder_context* encoder_context)
96 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97 double bitrate, framerate;
98 double frame_per_bits = 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99 double qp1_size = 0.1 * frame_per_bits;
100 double qp51_size = 0.001 * frame_per_bits;
101 int min_qp = MAX(1, encoder_context->brc.min_qp);
102 double bpf, factor, hrd_factor;
103 int inum = encoder_context->brc.num_iframes_in_gop,
104 pnum = encoder_context->brc.num_pframes_in_gop,
105 bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
106 int intra_period = encoder_context->brc.gop_size;
110 if (encoder_context->layer.num_layers > 1)
111 qp1_size = 0.15 * frame_per_bits;
113 mfc_context->brc.mode = encoder_context->rate_control_mode;
115 mfc_context->hrd.violation_noted = 0;
117 for (i = 0; i < encoder_context->layer.num_layers; i++) {
118 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
119 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
120 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
123 bitrate = encoder_context->brc.bits_per_second[0];
124 framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
126 bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
127 framerate = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) -
128 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
131 if (mfc_context->brc.mode == VA_RC_VBR && encoder_context->brc.target_percentage[i])
132 bitrate = bitrate * encoder_context->brc.target_percentage[i] / 100;
134 if (i == encoder_context->layer.num_layers - 1)
137 factor = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) /
138 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
141 hrd_factor = (double)bitrate / encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
143 mfc_context->hrd.buffer_size[i] = (unsigned int)(encoder_context->brc.hrd_buffer_size * hrd_factor);
144 mfc_context->hrd.current_buffer_fullness[i] =
145 (double)(encoder_context->brc.hrd_initial_buffer_fullness < encoder_context->brc.hrd_buffer_size) ?
146 encoder_context->brc.hrd_initial_buffer_fullness : encoder_context->brc.hrd_buffer_size / 2.;
147 mfc_context->hrd.current_buffer_fullness[i] *= hrd_factor;
148 mfc_context->hrd.target_buffer_fullness[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / 2.;
149 mfc_context->hrd.buffer_capacity[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / qp1_size;
151 if (encoder_context->layer.num_layers > 1) {
153 intra_period = (int)(encoder_context->brc.gop_size * factor);
155 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor);
156 bnum = intra_period - inum - pnum;
158 intra_period = (int)(encoder_context->brc.gop_size * factor) - intra_period;
160 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor) - pnum;
161 bnum = intra_period - inum - pnum;
165 mfc_context->brc.gop_nums[i][SLICE_TYPE_I] = inum;
166 mfc_context->brc.gop_nums[i][SLICE_TYPE_P] = pnum;
167 mfc_context->brc.gop_nums[i][SLICE_TYPE_B] = bnum;
169 mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period) / framerate) /
170 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
171 mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
172 mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
174 bpf = mfc_context->brc.bits_per_frame[i] = bitrate / framerate;
176 if (encoder_context->brc.initial_qp) {
177 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = encoder_context->brc.initial_qp;
178 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = encoder_context->brc.initial_qp;
179 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = encoder_context->brc.initial_qp;
181 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], min_qp, 51);
182 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], min_qp, 51);
183 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], min_qp, 51);
185 if ((bpf > qp51_size) && (bpf < qp1_size)) {
186 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50 * (bpf - qp51_size) / (qp1_size - qp51_size);
187 } else if (bpf >= qp1_size)
188 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
189 else if (bpf <= qp51_size)
190 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
192 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
193 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
195 tmp_min_qp = (min_qp < 36) ? min_qp : 36;
196 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], tmp_min_qp, 36);
197 tmp_min_qp = (min_qp < 40) ? min_qp : 40;
198 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], tmp_min_qp, 40);
199 tmp_min_qp = (min_qp < 45) ? min_qp : 45;
200 BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], tmp_min_qp, 45);
205 int intel_mfc_update_hrd(struct encode_state *encode_state,
206 struct intel_encoder_context *encoder_context,
209 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
210 int layer_id = encoder_context->layer.curr_frame_layer_id;
211 double prev_bf = mfc_context->hrd.current_buffer_fullness[layer_id];
213 mfc_context->hrd.current_buffer_fullness[layer_id] -= frame_bits;
215 if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] <= 0.) {
216 mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
217 return BRC_UNDERFLOW;
220 mfc_context->hrd.current_buffer_fullness[layer_id] += mfc_context->brc.bits_per_frame[layer_id];
221 if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] > mfc_context->hrd.buffer_size[layer_id]) {
222 if (mfc_context->brc.mode == VA_RC_VBR)
223 mfc_context->hrd.current_buffer_fullness[layer_id] = mfc_context->hrd.buffer_size[layer_id];
225 mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
229 return BRC_NO_HRD_VIOLATION;
232 static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
233 struct intel_encoder_context *encoder_context,
236 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
237 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
238 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
239 int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
240 int curr_frame_layer_id, next_frame_layer_id;
242 int qp; // quantizer of previously encoded slice of current type
243 int qpn; // predicted quantizer for next frame of current type in integer format
244 double qpf; // predicted quantizer for next frame of current type in float format
245 double delta_qp; // QP correction
246 int min_qp = MAX(1, encoder_context->brc.min_qp);
247 int target_frame_size, frame_size_next;
249 * x - how far we are from HRD buffer borders
250 * y - how far we are from target HRD buffer fullness
253 double frame_size_alpha;
255 if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
256 curr_frame_layer_id = 0;
257 next_frame_layer_id = 0;
259 curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
260 next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
263 /* checking wthether HRD compliance first */
264 sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
266 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
269 next_frame_layer_id = curr_frame_layer_id;
272 mfc_context->brc.bits_prev_frame[curr_frame_layer_id] = frame_bits;
273 frame_bits = mfc_context->brc.bits_prev_frame[next_frame_layer_id];
275 mfc_context->brc.prev_slice_type[curr_frame_layer_id] = slicetype;
276 slicetype = mfc_context->brc.prev_slice_type[next_frame_layer_id];
278 /* 0 means the next frame is the first frame of next layer */
282 qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
283 qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
284 qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
286 qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
288 target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
289 if (mfc_context->hrd.buffer_capacity[next_frame_layer_id] < 5)
290 frame_size_alpha = 0;
292 frame_size_alpha = (double)mfc_context->brc.gop_nums[next_frame_layer_id][slicetype];
293 if (frame_size_alpha > 30) frame_size_alpha = 30;
294 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
295 (double)(frame_size_alpha + 1.);
297 /* frame_size_next: avoiding negative number and too small value */
298 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
299 frame_size_next = (int)((double)target_frame_size * 0.25);
301 qpf = (double)qp * target_frame_size / frame_size_next;
302 qpn = (int)(qpf + 0.5);
305 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
306 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] += qpf - qpn;
307 if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] > 1.0) {
309 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
310 } else if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] < -1.0) {
312 mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
315 /* making sure that QP is not changing too fast */
316 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
317 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
318 /* making sure that with QP predictions we did do not leave QPs range */
319 BRC_CLIP(qpn, 1, 51);
321 /* calculating QP delta as some function*/
322 x = mfc_context->hrd.target_buffer_fullness[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
324 x /= mfc_context->hrd.target_buffer_fullness[next_frame_layer_id];
325 y = mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
327 x /= (mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.target_buffer_fullness[next_frame_layer_id]);
328 y = mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
330 if (y < 0.01) y = 0.01;
332 else if (x < -1) x = -1;
334 delta_qp = BRC_QP_MAX_CHANGE * exp(-1 / y) * sin(BRC_PI_0_5 * x);
335 qpn = (int)(qpn + delta_qp + 0.5);
337 /* making sure that with QP predictions we did do not leave QPs range */
338 BRC_CLIP(qpn, min_qp, 51);
340 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
341 /* correcting QPs of slices of other types */
342 if (slicetype == SLICE_TYPE_P) {
343 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
344 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
345 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
346 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
347 } else if (slicetype == SLICE_TYPE_I) {
348 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
349 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
350 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
351 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
352 } else { // SLICE_TYPE_B
353 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
354 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
355 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
356 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
358 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], min_qp, 51);
359 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], min_qp, 51);
360 BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], min_qp, 51);
361 } else if (sts == BRC_UNDERFLOW) { // underflow
362 if (qpn <= qp) qpn = qp + 1;
365 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
367 } else if (sts == BRC_OVERFLOW) {
368 if (qpn >= qp) qpn = qp - 1;
369 if (qpn < min_qp) { // overflow with minQP
371 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
375 mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
380 static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
381 struct intel_encoder_context *encoder_context,
384 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
386 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
387 int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
388 int *qp = mfc_context->brc.qp_prime_y[0];
389 int min_qp = MAX(1, encoder_context->brc.min_qp);
390 int qp_delta, large_frame_adjustment;
392 // This implements a simple reactive VBR rate control mode for single-layer H.264. The primary
393 // aim here is to avoid the problematic behaviour that the CBR rate controller displays on
394 // scene changes, where the QP can get pushed up by a large amount in a short period and
395 // compromise the quality of following frames to a very visible degree.
396 // The main idea, then, is to try to keep the HRD buffering above the target level most of the
397 // time, so that when a large frame is generated (on a scene change or when the stream
398 // complexity increases) we have plenty of slack to be able to encode the more difficult region
399 // without compromising quality immediately on the following frames. It is optimistic about
400 // the complexity of future frames, so even after generating one or more large frames on a
401 // significant change it will try to keep the QP at its current level until the HRD buffer
402 // bounds force a change to maintain the intended rate.
404 sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
406 // This adjustment is applied to increase the QP by more than we normally would if a very
407 // large frame is encountered and we are in danger of running out of slack.
408 large_frame_adjustment = rint(2.0 * log(frame_bits / mfc_context->brc.target_frame_size[0][slice_type]));
410 if (sts == BRC_UNDERFLOW) {
411 // The frame is far too big and we don't have the bits available to send it, so it will
412 // have to be re-encoded at a higher QP.
414 if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
415 qp_delta += large_frame_adjustment;
416 } else if (sts == BRC_OVERFLOW) {
417 // The frame is very small and we are now overflowing the HRD buffer. Currently this case
418 // does not occur because we ignore overflow in VBR mode.
419 assert(0 && "Overflow in VBR mode");
420 } else if (frame_bits <= mfc_context->brc.target_frame_size[0][slice_type]) {
421 // The frame is smaller than the average size expected for this frame type.
422 if (mfc_context->hrd.current_buffer_fullness[0] >
423 (mfc_context->hrd.target_buffer_fullness[0] + mfc_context->hrd.buffer_size[0]) / 2.0) {
424 // We currently have lots of bits available, so decrease the QP slightly for the next
428 // The HRD buffer fullness is increasing, so do nothing. (We may be under the target
429 // level here, but are moving in the right direction.)
433 // The frame is larger than the average size expected for this frame type.
434 if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0]) {
435 // We are currently over the target level, so do nothing.
437 } else if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0] / 2.0) {
438 // We are under the target level, but not critically. Increase the QP by one step if
439 // continuing like this would underflow soon (currently within one second).
440 if (mfc_context->hrd.current_buffer_fullness[0] /
441 (double)(frame_bits - mfc_context->brc.target_frame_size[0][slice_type] + 1) <
442 ((double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den))
447 // We are a long way under the target level. Always increase the QP, possibly by a
448 // larger amount dependent on how big the frame we just made actually was.
449 qp_delta = +1 + large_frame_adjustment;
453 switch (slice_type) {
455 qp[SLICE_TYPE_I] += qp_delta;
456 qp[SLICE_TYPE_P] = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
457 qp[SLICE_TYPE_B] = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
460 qp[SLICE_TYPE_P] += qp_delta;
461 qp[SLICE_TYPE_I] = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
462 qp[SLICE_TYPE_B] = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
465 qp[SLICE_TYPE_B] += qp_delta;
466 qp[SLICE_TYPE_I] = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
467 qp[SLICE_TYPE_P] = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
470 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qp, 51);
471 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qp, 51);
472 BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], min_qp, 51);
474 if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
475 sts = BRC_UNDERFLOW_WITH_MAX_QP;
476 if (sts == BRC_OVERFLOW && qp[slice_type] == min_qp)
477 sts = BRC_OVERFLOW_WITH_MIN_QP;
482 int intel_mfc_brc_postpack(struct encode_state *encode_state,
483 struct intel_encoder_context *encoder_context,
486 switch (encoder_context->rate_control_mode) {
488 return intel_mfc_brc_postpack_cbr(encode_state, encoder_context, frame_bits);
490 return intel_mfc_brc_postpack_vbr(encode_state, encoder_context, frame_bits);
492 assert(0 && "Invalid RC mode");
496 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
497 struct intel_encoder_context *encoder_context)
499 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
500 unsigned int rate_control_mode = encoder_context->rate_control_mode;
501 int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
503 // current we only support CBR mode.
504 if (rate_control_mode == VA_RC_CBR) {
505 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
506 mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
507 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
508 mfc_context->vui_hrd.i_frame_number = 0;
510 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
511 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
512 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
518 intel_mfc_hrd_context_update(struct encode_state *encode_state,
519 struct gen6_mfc_context *mfc_context)
521 mfc_context->vui_hrd.i_frame_number++;
524 int intel_mfc_interlace_check(VADriverContextP ctx,
525 struct encode_state *encode_state,
526 struct intel_encoder_context *encoder_context)
528 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
529 VAEncSliceParameterBufferH264 *pSliceParameter;
532 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
533 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
535 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
536 pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
537 mbCount += pSliceParameter->num_macroblocks;
540 if (mbCount == (width_in_mbs * height_in_mbs))
546 void intel_mfc_brc_prepare(struct encode_state *encode_state,
547 struct intel_encoder_context *encoder_context)
549 unsigned int rate_control_mode = encoder_context->rate_control_mode;
551 if (encoder_context->codec != CODEC_H264 &&
552 encoder_context->codec != CODEC_H264_MVC)
555 if (rate_control_mode != VA_RC_CQP) {
556 /*Programing bit rate control */
557 if (encoder_context->brc.need_reset) {
558 intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
559 intel_mfc_brc_init(encode_state, encoder_context);
562 /*Programing HRD control */
563 if (encoder_context->brc.need_reset)
564 intel_mfc_hrd_context_init(encode_state, encoder_context);
568 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
569 struct encode_state *encode_state,
570 struct intel_encoder_context *encoder_context,
571 struct intel_batchbuffer *slice_batch)
573 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
574 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
575 unsigned int rate_control_mode = encoder_context->rate_control_mode;
576 unsigned int skip_emul_byte_cnt;
578 if (encode_state->packed_header_data[idx]) {
579 VAEncPackedHeaderParameterBuffer *param = NULL;
580 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
581 unsigned int length_in_bits;
583 assert(encode_state->packed_header_param[idx]);
584 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
585 length_in_bits = param->bit_length;
587 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
588 mfc_context->insert_object(ctx,
591 ALIGN(length_in_bits, 32) >> 5,
592 length_in_bits & 0x1f,
596 !param->has_emulation_bytes,
600 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
602 if (encode_state->packed_header_data[idx]) {
603 VAEncPackedHeaderParameterBuffer *param = NULL;
604 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
605 unsigned int length_in_bits;
607 assert(encode_state->packed_header_param[idx]);
608 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
609 length_in_bits = param->bit_length;
611 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
613 mfc_context->insert_object(ctx,
616 ALIGN(length_in_bits, 32) >> 5,
617 length_in_bits & 0x1f,
621 !param->has_emulation_bytes,
625 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
627 if (encode_state->packed_header_data[idx]) {
628 VAEncPackedHeaderParameterBuffer *param = NULL;
629 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
630 unsigned int length_in_bits;
632 assert(encode_state->packed_header_param[idx]);
633 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
634 length_in_bits = param->bit_length;
636 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
637 mfc_context->insert_object(ctx,
640 ALIGN(length_in_bits, 32) >> 5,
641 length_in_bits & 0x1f,
645 !param->has_emulation_bytes,
647 } else if (rate_control_mode == VA_RC_CBR) {
649 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
651 unsigned char *sei_data = NULL;
653 int length_in_bits = build_avc_sei_buffer_timing(
654 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
655 mfc_context->vui_hrd.i_initial_cpb_removal_delay,
657 mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
658 mfc_context->vui_hrd.i_dpb_output_delay_length,
661 mfc_context->insert_object(ctx,
663 (unsigned int *)sei_data,
664 ALIGN(length_in_bits, 32) >> 5,
665 length_in_bits & 0x1f,
675 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
676 struct encode_state *encode_state,
677 struct intel_encoder_context *encoder_context)
679 struct i965_driver_data *i965 = i965_driver_data(ctx);
680 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
681 struct object_surface *obj_surface;
682 struct object_buffer *obj_buffer;
683 GenAvcSurface *gen6_avc_surface;
685 VAStatus vaStatus = VA_STATUS_SUCCESS;
686 int i, j, enable_avc_ildb = 0;
687 VAEncSliceParameterBufferH264 *slice_param;
688 struct i965_coded_buffer_segment *coded_buffer_segment;
689 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
690 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
691 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
693 if (IS_GEN6(i965->intel.device_info)) {
694 /* On the SNB it should be fixed to 128 for the DMV buffer */
698 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
699 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
700 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
702 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
703 assert((slice_param->slice_type == SLICE_TYPE_I) ||
704 (slice_param->slice_type == SLICE_TYPE_SI) ||
705 (slice_param->slice_type == SLICE_TYPE_P) ||
706 (slice_param->slice_type == SLICE_TYPE_SP) ||
707 (slice_param->slice_type == SLICE_TYPE_B));
709 if (slice_param->disable_deblocking_filter_idc != 1) {
718 /*Setup all the input&output object*/
720 /* Setup current frame and current direct mv buffer*/
721 obj_surface = encode_state->reconstructed_object;
722 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
724 if (obj_surface->private_data == NULL) {
725 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
726 assert(gen6_avc_surface);
727 gen6_avc_surface->dmv_top =
728 dri_bo_alloc(i965->intel.bufmgr,
730 68 * width_in_mbs * height_in_mbs,
732 gen6_avc_surface->dmv_bottom =
733 dri_bo_alloc(i965->intel.bufmgr,
735 68 * width_in_mbs * height_in_mbs,
737 assert(gen6_avc_surface->dmv_top);
738 assert(gen6_avc_surface->dmv_bottom);
739 obj_surface->private_data = (void *)gen6_avc_surface;
740 obj_surface->free_private_data = (void *)gen_free_avc_surface;
742 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
743 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
744 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
745 dri_bo_reference(gen6_avc_surface->dmv_top);
746 dri_bo_reference(gen6_avc_surface->dmv_bottom);
748 if (enable_avc_ildb) {
749 mfc_context->post_deblocking_output.bo = obj_surface->bo;
750 dri_bo_reference(mfc_context->post_deblocking_output.bo);
752 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
753 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
756 mfc_context->surface_state.width = obj_surface->orig_width;
757 mfc_context->surface_state.height = obj_surface->orig_height;
758 mfc_context->surface_state.w_pitch = obj_surface->width;
759 mfc_context->surface_state.h_pitch = obj_surface->height;
761 /* Setup reference frames and direct mv buffers*/
762 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
763 obj_surface = encode_state->reference_objects[i];
765 if (obj_surface && obj_surface->bo) {
766 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
767 dri_bo_reference(obj_surface->bo);
769 /* Check DMV buffer */
770 if (obj_surface->private_data == NULL) {
772 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
773 assert(gen6_avc_surface);
774 gen6_avc_surface->dmv_top =
775 dri_bo_alloc(i965->intel.bufmgr,
777 68 * width_in_mbs * height_in_mbs,
779 gen6_avc_surface->dmv_bottom =
780 dri_bo_alloc(i965->intel.bufmgr,
782 68 * width_in_mbs * height_in_mbs,
784 assert(gen6_avc_surface->dmv_top);
785 assert(gen6_avc_surface->dmv_bottom);
786 obj_surface->private_data = gen6_avc_surface;
787 obj_surface->free_private_data = gen_free_avc_surface;
790 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
791 /* Setup DMV buffer */
792 mfc_context->direct_mv_buffers[i * 2].bo = gen6_avc_surface->dmv_top;
793 mfc_context->direct_mv_buffers[i * 2 + 1].bo = gen6_avc_surface->dmv_bottom;
794 dri_bo_reference(gen6_avc_surface->dmv_top);
795 dri_bo_reference(gen6_avc_surface->dmv_bottom);
801 mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
802 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
804 obj_buffer = encode_state->coded_buf_object;
805 bo = obj_buffer->buffer_store->bo;
806 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
807 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
808 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
809 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
812 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
813 coded_buffer_segment->mapped = 0;
814 coded_buffer_segment->codec = encoder_context->codec;
820 * The LUT uses the pair of 4-bit units: (shift, base) structure.
822 * So it is necessary to convert one cost into the nearest LUT format.
824 * 2^K *x = 2^n * (1 + deltaX)
825 * k + log2(x) = n + log2(1 + deltaX)
826 * log2(x) = n - k + log2(1 + deltaX)
827 * As X is in the range of [1, 15]
828 * 4 > n - k + log2(1 + deltaX) >= 0
829 * => n + log2(1 + deltaX) >= k > n - 4 + log2(1 + deltaX)
830 * Then we can derive the corresponding K and get the nearest LUT format.
832 int intel_format_lutvalue(int value, int max)
835 int logvalue, temp1, temp2;
840 logvalue = (int)(log2f((float)value));
844 int error, temp_value, base, j, temp_err;
846 j = logvalue - 4 + 1;
848 for (; j <= logvalue; j++) {
852 base = (value + (1 << (j - 1)) - 1) >> j;
857 temp_value = base << j;
858 temp_err = abs(value - temp_value);
859 if (temp_err < error) {
861 ret = (j << 4) | base;
867 temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
868 temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
877 #define VP8_QP_MAX 128
880 static float intel_lambda_qp(int qp)
882 float value, lambdaf;
884 value = value / 6 - 2;
887 lambdaf = roundf(powf(2, value));
892 void intel_h264_calc_mbmvcost_qp(int qp,
894 uint8_t *vme_state_message)
896 int m_cost, j, mv_count;
897 float lambda, m_costf;
899 assert(qp <= QP_MAX);
900 lambda = intel_lambda_qp(qp);
903 vme_state_message[MODE_CHROMA_INTRA] = 0;
904 vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
906 if (slice_type == SLICE_TYPE_I) {
907 vme_state_message[MODE_INTRA_16X16] = 0;
909 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
910 m_cost = lambda * 16;
911 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
913 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
916 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
917 for (j = 1; j < 3; j++) {
918 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
919 m_cost = (int)m_costf;
920 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
923 for (j = 4; j <= 64; j *= 2) {
924 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
925 m_cost = (int)m_costf;
926 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
931 vme_state_message[MODE_INTRA_16X16] = 0x4a;
932 vme_state_message[MODE_INTRA_8X8] = 0x4a;
933 vme_state_message[MODE_INTRA_4X4] = 0x4a;
934 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
935 vme_state_message[MODE_INTER_16X16] = 0x4a;
936 vme_state_message[MODE_INTER_16X8] = 0x4a;
937 vme_state_message[MODE_INTER_8X8] = 0x4a;
938 vme_state_message[MODE_INTER_8X4] = 0x4a;
939 vme_state_message[MODE_INTER_4X4] = 0x4a;
940 vme_state_message[MODE_INTER_BWD] = 0x2a;
943 m_costf = lambda * 10;
944 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
945 m_cost = lambda * 14;
946 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
947 m_cost = lambda * 24;
948 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
949 m_costf = lambda * 3.5;
951 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
952 if (slice_type == SLICE_TYPE_P) {
953 m_costf = lambda * 2.5;
955 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
956 m_costf = lambda * 4;
958 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
959 m_costf = lambda * 1.5;
961 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
962 m_costf = lambda * 3;
964 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
965 m_costf = lambda * 5;
967 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
968 /* BWD is not used in P-frame */
969 vme_state_message[MODE_INTER_BWD] = 0;
971 m_costf = lambda * 2.5;
973 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
974 m_costf = lambda * 5.5;
976 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
977 m_costf = lambda * 3.5;
979 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
980 m_costf = lambda * 5.0;
982 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
983 m_costf = lambda * 6.5;
985 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
986 m_costf = lambda * 1.5;
988 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
994 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
995 struct encode_state *encode_state,
996 struct intel_encoder_context *encoder_context)
998 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
999 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1000 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1001 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1003 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
1005 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1007 if (encoder_context->rate_control_mode == VA_RC_CQP)
1008 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1010 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1012 if (vme_state_message == NULL)
1015 intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
1018 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
1019 struct encode_state *encode_state,
1020 struct intel_encoder_context *encoder_context)
1022 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1023 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1024 VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1025 VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
1026 int qp, m_cost, j, mv_count;
1027 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
1028 float lambda, m_costf;
1030 int is_key_frame = !pic_param->pic_flags.bits.frame_type;
1031 int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
1033 if (vme_state_message == NULL)
1036 if (encoder_context->rate_control_mode == VA_RC_CQP)
1037 qp = q_matrix->quantization_index[0];
1039 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1041 lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
1044 vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
1047 vme_state_message[MODE_INTRA_16X16] = 0;
1048 m_cost = lambda * 16;
1049 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
1050 m_cost = lambda * 3;
1051 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
1054 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
1055 for (j = 1; j < 3; j++) {
1056 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1057 m_cost = (int)m_costf;
1058 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1061 for (j = 4; j <= 64; j *= 2) {
1062 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1063 m_cost = (int)m_costf;
1064 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
1069 vme_state_message[MODE_INTRA_16X16] = 0x4a;
1070 vme_state_message[MODE_INTRA_4X4] = 0x4a;
1071 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
1072 vme_state_message[MODE_INTER_16X16] = 0x4a;
1073 vme_state_message[MODE_INTER_16X8] = 0x4a;
1074 vme_state_message[MODE_INTER_8X8] = 0x4a;
1075 vme_state_message[MODE_INTER_4X4] = 0x4a;
1076 vme_state_message[MODE_INTER_BWD] = 0;
1079 m_costf = lambda * 10;
1080 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1081 m_cost = lambda * 24;
1082 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
1084 m_costf = lambda * 3.5;
1086 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
1088 m_costf = lambda * 2.5;
1090 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1091 m_costf = lambda * 4;
1093 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
1094 m_costf = lambda * 1.5;
1096 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
1097 m_costf = lambda * 5;
1099 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
1100 /* BWD is not used in P-frame */
1101 vme_state_message[MODE_INTER_BWD] = 0;
1105 #define MB_SCOREBOARD_A (1 << 0)
1106 #define MB_SCOREBOARD_B (1 << 1)
1107 #define MB_SCOREBOARD_C (1 << 2)
1109 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
1111 vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
1112 vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
1113 vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
1117 /* In VME prediction the current mb depends on the neighbour
1118 * A/B/C macroblock. So the left/up/up-right dependency should
1121 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
1122 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
1123 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
1124 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
1125 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
1126 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
1128 vme_context->gpe_context.vfe_desc7.dword = 0;
1132 /* check whether the mb of (x_index, y_index) is out of bound */
1133 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
1136 if (x_index < 0 || x_index >= mb_width)
1138 if (y_index < 0 || y_index >= mb_height)
1141 mb_index = y_index * mb_width + x_index;
1142 if (mb_index < first_mb || mb_index > (first_mb + num_mb))
1148 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1149 struct encode_state *encode_state,
1150 int mb_width, int mb_height,
1152 int transform_8x8_mode_flag,
1153 struct intel_encoder_context *encoder_context)
1155 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1158 unsigned int *command_ptr;
1159 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1160 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1161 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1162 int qp, qp_mb, qp_index;
1163 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1165 if (encoder_context->rate_control_mode == VA_RC_CQP)
1166 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1168 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1170 #define USE_SCOREBOARD (1 << 21)
1172 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1173 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1175 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1176 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1177 int first_mb = pSliceParameter->macroblock_address;
1178 int num_mb = pSliceParameter->num_macroblocks;
1179 unsigned int mb_intra_ub, score_dep;
1180 int x_outer, y_outer, x_inner, y_inner;
1181 int xtemp_outer = 0;
1183 x_outer = first_mb % mb_width;
1184 y_outer = first_mb / mb_width;
1187 for (; x_outer < (mb_width - 2) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1190 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1194 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1195 score_dep |= MB_SCOREBOARD_A;
1197 if (y_inner != mb_row) {
1198 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1199 score_dep |= MB_SCOREBOARD_B;
1201 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1202 if (x_inner != (mb_width - 1)) {
1203 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1204 score_dep |= MB_SCOREBOARD_C;
1208 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1209 *command_ptr++ = kernel;
1210 *command_ptr++ = USE_SCOREBOARD;
1213 /* the (X, Y) term of scoreboard */
1214 *command_ptr++ = ((y_inner << 16) | x_inner);
1215 *command_ptr++ = score_dep;
1217 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1218 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1219 /* QP occupies one byte */
1220 if (vme_context->roi_enabled) {
1221 qp_index = y_inner * mb_width + x_inner;
1222 qp_mb = *(vme_context->qp_per_mb + qp_index);
1225 *command_ptr++ = qp_mb;
1232 xtemp_outer = mb_width - 2;
1233 if (xtemp_outer < 0)
1235 x_outer = xtemp_outer;
1236 y_outer = first_mb / mb_width;
1237 for (; !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1240 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1244 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1245 score_dep |= MB_SCOREBOARD_A;
1247 if (y_inner != mb_row) {
1248 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1249 score_dep |= MB_SCOREBOARD_B;
1251 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1253 if (x_inner != (mb_width - 1)) {
1254 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1255 score_dep |= MB_SCOREBOARD_C;
1259 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1260 *command_ptr++ = kernel;
1261 *command_ptr++ = USE_SCOREBOARD;
1264 /* the (X, Y) term of scoreboard */
1265 *command_ptr++ = ((y_inner << 16) | x_inner);
1266 *command_ptr++ = score_dep;
1268 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1269 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1270 /* qp occupies one byte */
1271 if (vme_context->roi_enabled) {
1272 qp_index = y_inner * mb_width + x_inner;
1273 qp_mb = *(vme_context->qp_per_mb + qp_index);
1276 *command_ptr++ = qp_mb;
1282 if (x_outer >= mb_width) {
1284 x_outer = xtemp_outer;
1290 *command_ptr++ = MI_BATCH_BUFFER_END;
1292 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1296 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1298 unsigned int is_long_term =
1299 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1300 unsigned int is_top_field =
1301 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1302 unsigned int is_bottom_field =
1303 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1305 return ((is_long_term << 6) |
1306 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1307 (frame_store_id << 1) |
1308 ((is_top_field ^ 1) & is_bottom_field));
1312 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1313 struct encode_state *encode_state,
1314 struct intel_encoder_context *encoder_context)
1316 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1317 struct intel_batchbuffer *batch = encoder_context->base.batch;
1319 struct object_surface *obj_surface;
1320 unsigned int fref_entry, bref_entry;
1322 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1324 fref_entry = 0x80808080;
1325 bref_entry = 0x80808080;
1326 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1328 if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1329 int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1331 if (ref_idx_l0 > 3) {
1332 WARN_ONCE("ref_idx_l0 is out of range\n");
1336 obj_surface = vme_context->used_reference_objects[0];
1338 for (i = 0; i < 16; i++) {
1340 obj_surface == encode_state->reference_objects[i]) {
1345 if (frame_index == -1) {
1346 WARN_ONCE("RefPicList0 is not found in DPB!\n");
1348 int ref_idx_l0_shift = ref_idx_l0 * 8;
1349 fref_entry &= ~(0xFF << ref_idx_l0_shift);
1350 fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1354 if (slice_type == SLICE_TYPE_B) {
1355 int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1357 if (ref_idx_l1 > 3) {
1358 WARN_ONCE("ref_idx_l1 is out of range\n");
1362 obj_surface = vme_context->used_reference_objects[1];
1364 for (i = 0; i < 16; i++) {
1366 obj_surface == encode_state->reference_objects[i]) {
1371 if (frame_index == -1) {
1372 WARN_ONCE("RefPicList1 is not found in DPB!\n");
1374 int ref_idx_l1_shift = ref_idx_l1 * 8;
1375 bref_entry &= ~(0xFF << ref_idx_l1_shift);
1376 bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1380 BEGIN_BCS_BATCH(batch, 10);
1381 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1382 OUT_BCS_BATCH(batch, 0); //Select L0
1383 OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference
1384 for (i = 0; i < 7; i++) {
1385 OUT_BCS_BATCH(batch, 0x80808080);
1387 ADVANCE_BCS_BATCH(batch);
1389 BEGIN_BCS_BATCH(batch, 10);
1390 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1391 OUT_BCS_BATCH(batch, 1); //Select L1
1392 OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference
1393 for (i = 0; i < 7; i++) {
1394 OUT_BCS_BATCH(batch, 0x80808080);
1396 ADVANCE_BCS_BATCH(batch);
1400 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1401 struct encode_state *encode_state,
1402 struct intel_encoder_context *encoder_context)
1404 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1405 uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1406 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1407 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1408 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1409 uint32_t mv_x, mv_y;
1410 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1411 VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1412 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1414 if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1417 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1420 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1424 WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1429 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1430 if (pic_param->picture_type != VAEncPictureTypeIntra) {
1431 int qp, m_cost, j, mv_count;
1432 float lambda, m_costf;
1433 slice_param = (VAEncSliceParameterBufferMPEG2 *)
1434 encode_state->slice_params_ext[0]->buffer;
1435 qp = slice_param->quantiser_scale_code;
1436 lambda = intel_lambda_qp(qp);
1437 /* No Intra prediction. So it is zero */
1438 vme_state_message[MODE_INTRA_8X8] = 0;
1439 vme_state_message[MODE_INTRA_4X4] = 0;
1440 vme_state_message[MODE_INTER_MV0] = 0;
1441 for (j = 1; j < 3; j++) {
1442 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1443 m_cost = (int)m_costf;
1444 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1447 for (j = 4; j <= 64; j *= 2) {
1448 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1449 m_cost = (int)m_costf;
1450 vme_state_message[MODE_INTER_MV0 + mv_count] =
1451 intel_format_lutvalue(m_cost, 0x6f);
1455 /* It can only perform the 16x16 search. So mode cost can be ignored for
1456 * the other mode. for example: 16x8/8x8
1458 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1459 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1461 vme_state_message[MODE_INTER_16X8] = 0;
1462 vme_state_message[MODE_INTER_8X8] = 0;
1463 vme_state_message[MODE_INTER_8X4] = 0;
1464 vme_state_message[MODE_INTER_4X4] = 0;
1465 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1468 vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1470 vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1475 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1476 struct encode_state *encode_state,
1477 int mb_width, int mb_height,
1479 struct intel_encoder_context *encoder_context)
1481 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1482 unsigned int *command_ptr;
1484 #define MPEG2_SCOREBOARD (1 << 21)
1486 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1487 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1490 unsigned int mb_intra_ub, score_dep;
1491 int x_outer, y_outer, x_inner, y_inner;
1492 int xtemp_outer = 0;
1494 int num_mb = mb_width * mb_height;
1500 for (; x_outer < (mb_width - 2) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1503 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1507 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1508 score_dep |= MB_SCOREBOARD_A;
1511 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1512 score_dep |= MB_SCOREBOARD_B;
1515 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1517 if (x_inner != (mb_width - 1)) {
1518 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1519 score_dep |= MB_SCOREBOARD_C;
1523 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1524 *command_ptr++ = kernel;
1525 *command_ptr++ = MPEG2_SCOREBOARD;
1528 /* the (X, Y) term of scoreboard */
1529 *command_ptr++ = ((y_inner << 16) | x_inner);
1530 *command_ptr++ = score_dep;
1532 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1533 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1540 xtemp_outer = mb_width - 2;
1541 if (xtemp_outer < 0)
1543 x_outer = xtemp_outer;
1545 for (; !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1548 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1552 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1553 score_dep |= MB_SCOREBOARD_A;
1556 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1557 score_dep |= MB_SCOREBOARD_B;
1560 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1562 if (x_inner != (mb_width - 1)) {
1563 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1564 score_dep |= MB_SCOREBOARD_C;
1568 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1569 *command_ptr++ = kernel;
1570 *command_ptr++ = MPEG2_SCOREBOARD;
1573 /* the (X, Y) term of scoreboard */
1574 *command_ptr++ = ((y_inner << 16) | x_inner);
1575 *command_ptr++ = score_dep;
1577 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1578 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1584 if (x_outer >= mb_width) {
1586 x_outer = xtemp_outer;
1592 *command_ptr++ = MI_BATCH_BUFFER_END;
1594 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1599 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1600 VAPictureH264 *ref_list,
1604 int i, found = -1, min = 0x7FFFFFFF;
1606 for (i = 0; i < num_pictures; i++) {
1609 if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1610 (ref_list[i].picture_id == VA_INVALID_SURFACE))
1613 tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1618 if (tmp > 0 && tmp < min) {
1628 intel_avc_vme_reference_state(VADriverContextP ctx,
1629 struct encode_state *encode_state,
1630 struct intel_encoder_context *encoder_context,
1633 void (* vme_source_surface_state)(
1634 VADriverContextP ctx,
1636 struct object_surface *obj_surface,
1637 struct intel_encoder_context *encoder_context))
1639 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1640 struct object_surface *obj_surface = NULL;
1641 struct i965_driver_data *i965 = i965_driver_data(ctx);
1642 VASurfaceID ref_surface_id;
1643 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1644 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1645 int max_num_references;
1646 VAPictureH264 *curr_pic;
1647 VAPictureH264 *ref_list;
1650 if (list_index == 0) {
1651 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1652 ref_list = slice_param->RefPicList0;
1654 max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1655 ref_list = slice_param->RefPicList1;
1658 if (max_num_references == 1) {
1659 if (list_index == 0) {
1660 ref_surface_id = slice_param->RefPicList0[0].picture_id;
1661 vme_context->used_references[0] = &slice_param->RefPicList0[0];
1663 ref_surface_id = slice_param->RefPicList1[0].picture_id;
1664 vme_context->used_references[1] = &slice_param->RefPicList1[0];
1667 if (ref_surface_id != VA_INVALID_SURFACE)
1668 obj_surface = SURFACE(ref_surface_id);
1672 obj_surface = encode_state->reference_objects[list_index];
1673 vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1678 curr_pic = &pic_param->CurrPic;
1680 /* select the reference frame in temporal space */
1681 ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1682 ref_surface_id = ref_list[ref_idx].picture_id;
1684 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1685 obj_surface = SURFACE(ref_surface_id);
1687 vme_context->used_reference_objects[list_index] = obj_surface;
1688 vme_context->used_references[list_index] = &ref_list[ref_idx];
1693 assert(ref_idx >= 0);
1694 vme_context->used_reference_objects[list_index] = obj_surface;
1695 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1696 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1701 vme_context->used_reference_objects[list_index] = NULL;
1702 vme_context->used_references[list_index] = NULL;
1703 vme_context->ref_index_in_mb[list_index] = 0;
1707 #define AVC_NAL_DELIMITER 9
1709 intel_avc_insert_aud_packed_data(VADriverContextP ctx,
1710 struct encode_state *encode_state,
1711 struct intel_encoder_context *encoder_context,
1712 struct intel_batchbuffer *batch)
1714 VAEncPackedHeaderParameterBuffer *param = NULL;
1715 unsigned int length_in_bits;
1716 unsigned int *header_data = NULL;
1717 unsigned char *nal_type = NULL;
1718 int count, i, start_index;
1719 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1721 count = encode_state->slice_rawdata_count[0];
1722 start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
1724 for (i = 0; i < count; i++) {
1725 unsigned int skip_emul_byte_cnt;
1727 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1728 nal_type = (unsigned char *)header_data;
1730 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
1732 length_in_bits = param->bit_length;
1734 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1736 if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
1737 mfc_context->insert_object(ctx,
1740 ALIGN(length_in_bits, 32) >> 5,
1741 length_in_bits & 0x1f,
1745 !param->has_emulation_bytes,
1753 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1754 struct encode_state *encode_state,
1755 struct intel_encoder_context *encoder_context,
1757 struct intel_batchbuffer *slice_batch)
1759 int count, i, start_index;
1760 unsigned int length_in_bits;
1761 VAEncPackedHeaderParameterBuffer *param = NULL;
1762 unsigned int *header_data = NULL;
1763 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1764 int slice_header_index;
1765 unsigned char *nal_type = NULL;
1767 if (encode_state->slice_header_index[slice_index] == 0)
1768 slice_header_index = -1;
1770 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1772 count = encode_state->slice_rawdata_count[slice_index];
1773 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1775 for (i = 0; i < count; i++) {
1776 unsigned int skip_emul_byte_cnt;
1778 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1779 nal_type = (unsigned char *)header_data;
1781 param = (VAEncPackedHeaderParameterBuffer *)
1782 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1784 length_in_bits = param->bit_length;
1786 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1788 /* skip the slice header/AUD packed data type as it is lastly inserted */
1789 if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
1792 /* as the slice header is still required, the last header flag is set to
1795 mfc_context->insert_object(ctx,
1798 ALIGN(length_in_bits, 32) >> 5,
1799 length_in_bits & 0x1f,
1803 !param->has_emulation_bytes,
1807 if (slice_header_index == -1) {
1808 unsigned char *slice_header = NULL;
1809 int slice_header_length_in_bits = 0;
1810 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1811 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1812 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1814 /* No slice header data is passed. And the driver needs to generate it */
1815 /* For the Normal H264 */
1816 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1820 mfc_context->insert_object(ctx, encoder_context,
1821 (unsigned int *)slice_header,
1822 ALIGN(slice_header_length_in_bits, 32) >> 5,
1823 slice_header_length_in_bits & 0x1f,
1824 5, /* first 5 bytes are start code + nal unit type */
1825 1, 0, 1, slice_batch);
1829 unsigned int skip_emul_byte_cnt;
1831 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1833 param = (VAEncPackedHeaderParameterBuffer *)
1834 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1835 length_in_bits = param->bit_length;
1837 /* as the slice header is the last header data for one slice,
1838 * the last header flag is set to one.
1840 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1842 mfc_context->insert_object(ctx,
1845 ALIGN(length_in_bits, 32) >> 5,
1846 length_in_bits & 0x1f,
1850 !param->has_emulation_bytes,
1858 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1859 struct encode_state *encode_state,
1860 struct intel_encoder_context *encoder_context)
1862 struct i965_driver_data *i965 = i965_driver_data(ctx);
1863 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1864 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1867 uint8_t *cost_table;
1869 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1872 if (slice_type == SLICE_TYPE_I) {
1873 if (vme_context->i_qp_cost_table)
1875 } else if (slice_type == SLICE_TYPE_P) {
1876 if (vme_context->p_qp_cost_table)
1879 if (vme_context->b_qp_cost_table)
1883 /* It is enough to allocate 32 bytes for each qp. */
1884 bo = dri_bo_alloc(i965->intel.bufmgr,
1890 assert(bo->virtual);
1891 cost_table = (uint8_t *)(bo->virtual);
1892 for (qp = 0; qp < QP_MAX; qp++) {
1893 intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1899 if (slice_type == SLICE_TYPE_I) {
1900 vme_context->i_qp_cost_table = bo;
1901 } else if (slice_type == SLICE_TYPE_P) {
1902 vme_context->p_qp_cost_table = bo;
1904 vme_context->b_qp_cost_table = bo;
1907 vme_context->cost_table_size = QP_MAX * 32;
1912 intel_h264_setup_cost_surface(VADriverContextP ctx,
1913 struct encode_state *encode_state,
1914 struct intel_encoder_context *encoder_context,
1915 unsigned long binding_table_offset,
1916 unsigned long surface_state_offset)
1918 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1919 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1923 struct i965_buffer_surface cost_table;
1925 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1928 if (slice_type == SLICE_TYPE_I) {
1929 bo = vme_context->i_qp_cost_table;
1930 } else if (slice_type == SLICE_TYPE_P) {
1931 bo = vme_context->p_qp_cost_table;
1933 bo = vme_context->b_qp_cost_table;
1937 cost_table.num_blocks = QP_MAX;
1938 cost_table.pitch = 16;
1939 cost_table.size_block = 32;
1941 vme_context->vme_buffer_suface_setup(ctx,
1942 &vme_context->gpe_context,
1944 binding_table_offset,
1945 surface_state_offset);
1949 * the idea of conversion between qp and qstep comes from scaling process
1950 * of transform coeff for Luma component in H264 spec.
1952 * In order to avoid too small qstep, it is multiplied by 16.
1954 static float intel_h264_qp_qstep(int qp)
1958 value = value / 6 - 2;
1959 qstep = powf(2, value);
1963 static int intel_h264_qstep_qp(float qstep)
1967 qp = 12.0f + 6.0f * log2f(qstep);
1973 * Currently it is based on the following assumption:
1974 * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1975 * total_aread * 1 / baseqp_qstep
1977 * qstep is the linearized quantizer of H264 quantizer
1980 int row_start_in_mb;
1982 int col_start_in_mb;
1992 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1994 struct encode_state *encode_state,
1995 struct intel_encoder_context *encoder_context)
1998 int min_qp = MAX(1, encoder_context->brc.min_qp);
2001 ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
2006 float qstep_nonroi, qstep_base;
2007 float roi_area, total_area, nonroi_area;
2010 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2011 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
2012 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
2013 int mbs_in_picture = width_in_mbs * height_in_mbs;
2015 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2016 VAStatus vaStatus = VA_STATUS_SUCCESS;
2018 /* currently roi_value_is_qp_delta is the only supported mode of priority.
2020 * qp_delta set by user is added to base_qp, which is then clapped by
2021 * [base_qp-min_delta, base_qp+max_delta].
2023 ASSERT_RET(encoder_context->brc.roi_value_is_qp_delta, VA_STATUS_ERROR_INVALID_PARAMETER);
2025 num_roi = encoder_context->brc.num_roi;
2027 /* when the base_qp is lower than 12, the quality is quite good based
2028 * on the H264 test experience.
2029 * In such case it is unnecessary to adjust the quality for ROI region.
2031 if (base_qp <= 12) {
2032 nonroi_qp = base_qp;
2039 for (i = 0; i < num_roi; i++) {
2040 int row_start, row_end, col_start, col_end;
2041 int roi_width_mbs, roi_height_mbs;
2046 col_start = encoder_context->brc.roi[i].left;
2047 col_end = encoder_context->brc.roi[i].right;
2048 row_start = encoder_context->brc.roi[i].top;
2049 row_end = encoder_context->brc.roi[i].bottom;
2051 col_start = col_start / 16;
2052 col_end = (col_end + 15) / 16;
2053 row_start = row_start / 16;
2054 row_end = (row_end + 15) / 16;
2056 roi_width_mbs = col_end - col_start;
2057 roi_height_mbs = row_end - row_start;
2058 mbs_in_roi = roi_width_mbs * roi_height_mbs;
2060 param_regions[i].row_start_in_mb = row_start;
2061 param_regions[i].row_end_in_mb = row_end;
2062 param_regions[i].col_start_in_mb = col_start;
2063 param_regions[i].col_end_in_mb = col_end;
2064 param_regions[i].width_mbs = roi_width_mbs;
2065 param_regions[i].height_mbs = roi_height_mbs;
2067 roi_qp = base_qp + encoder_context->brc.roi[i].value;
2068 BRC_CLIP(roi_qp, min_qp, 51);
2070 param_regions[i].roi_qp = roi_qp;
2071 qstep_roi = intel_h264_qp_qstep(roi_qp);
2073 roi_area += mbs_in_roi;
2074 sum_roi += mbs_in_roi / qstep_roi;
2077 total_area = mbs_in_picture;
2078 nonroi_area = total_area - roi_area;
2080 qstep_base = intel_h264_qp_qstep(base_qp);
2081 temp = (total_area / qstep_base - sum_roi);
2086 qstep_nonroi = nonroi_area / temp;
2087 nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
2090 BRC_CLIP(nonroi_qp, min_qp, 51);
2093 memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
2097 for (i = 0; i < num_roi; i++) {
2098 for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
2099 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
2100 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
2108 intel_h264_enc_roi_config(VADriverContextP ctx,
2109 struct encode_state *encode_state,
2110 struct intel_encoder_context *encoder_context)
2114 struct i965_driver_data *i965 = i965_driver_data(ctx);
2115 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2116 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2117 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2118 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
2119 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
2121 int row_start, row_end, col_start, col_end;
2124 vme_context->roi_enabled = 0;
2125 /* Restriction: Disable ROI when multi-slice is enabled */
2126 if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
2129 vme_context->roi_enabled = !!encoder_context->brc.num_roi;
2131 if (!vme_context->roi_enabled)
2134 if ((vme_context->saved_width_mbs != width_in_mbs) ||
2135 (vme_context->saved_height_mbs != height_in_mbs)) {
2136 free(vme_context->qp_per_mb);
2137 vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
2139 vme_context->saved_width_mbs = width_in_mbs;
2140 vme_context->saved_height_mbs = height_in_mbs;
2141 assert(vme_context->qp_per_mb);
2143 if (encoder_context->rate_control_mode == VA_RC_CBR) {
2145 * TODO: More complex Qp adjust needs to be added.
2146 * Currently it is initialized to slice_qp.
2148 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2150 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
2152 qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
2153 intel_h264_enc_roi_cbr(ctx, qp, encode_state, encoder_context);
2155 } else if (encoder_context->rate_control_mode == VA_RC_CQP) {
2156 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2157 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2159 int min_qp = MAX(1, encoder_context->brc.min_qp);
2161 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2162 memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
2165 for (j = num_roi; j ; j--) {
2166 int qp_delta, qp_clip;
2168 col_start = encoder_context->brc.roi[i].left;
2169 col_end = encoder_context->brc.roi[i].right;
2170 row_start = encoder_context->brc.roi[i].top;
2171 row_end = encoder_context->brc.roi[i].bottom;
2173 col_start = col_start / 16;
2174 col_end = (col_end + 15) / 16;
2175 row_start = row_start / 16;
2176 row_end = (row_end + 15) / 16;
2178 qp_delta = encoder_context->brc.roi[i].value;
2179 qp_clip = qp + qp_delta;
2181 BRC_CLIP(qp_clip, min_qp, 51);
2183 for (i = row_start; i < row_end; i++) {
2184 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
2185 memset(qp_ptr, qp_clip, (col_end - col_start));
2190 * TODO: Disable it for non CBR-CQP.
2192 vme_context->roi_enabled = 0;
2195 if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2196 encoder_context->soft_batch_force = 1;
2203 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2204 VAPictureHEVC *ref_list,
2208 int i, found = -1, min = 0x7FFFFFFF;
2210 for (i = 0; i < num_pictures; i++) {
2213 if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2214 (ref_list[i].picture_id == VA_INVALID_SURFACE))
2217 tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2222 if (tmp > 0 && tmp < min) {
2231 intel_hevc_vme_reference_state(VADriverContextP ctx,
2232 struct encode_state *encode_state,
2233 struct intel_encoder_context *encoder_context,
2236 void (* vme_source_surface_state)(
2237 VADriverContextP ctx,
2239 struct object_surface *obj_surface,
2240 struct intel_encoder_context *encoder_context))
2242 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2243 struct object_surface *obj_surface = NULL;
2244 struct i965_driver_data *i965 = i965_driver_data(ctx);
2245 VASurfaceID ref_surface_id;
2246 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2247 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2248 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2249 int max_num_references;
2250 VAPictureHEVC *curr_pic;
2251 VAPictureHEVC *ref_list;
2253 unsigned int is_hevc10 = 0;
2254 GenHevcSurface *hevc_encoder_surface = NULL;
2256 if ((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2257 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2260 if (list_index == 0) {
2261 max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2262 ref_list = slice_param->ref_pic_list0;
2264 max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2265 ref_list = slice_param->ref_pic_list1;
2268 if (max_num_references == 1) {
2269 if (list_index == 0) {
2270 ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2271 vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2273 ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2274 vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2277 if (ref_surface_id != VA_INVALID_SURFACE)
2278 obj_surface = SURFACE(ref_surface_id);
2282 obj_surface = encode_state->reference_objects[list_index];
2283 vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2288 curr_pic = &pic_param->decoded_curr_pic;
2290 /* select the reference frame in temporal space */
2291 ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2292 ref_surface_id = ref_list[ref_idx].picture_id;
2294 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2295 obj_surface = SURFACE(ref_surface_id);
2297 vme_context->used_reference_objects[list_index] = obj_surface;
2298 vme_context->used_references[list_index] = &ref_list[ref_idx];
2303 assert(ref_idx >= 0);
2304 vme_context->used_reference_objects[list_index] = obj_surface;
2307 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2308 assert(hevc_encoder_surface);
2309 obj_surface = hevc_encoder_surface->nv12_surface_obj;
2311 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2312 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2317 vme_context->used_reference_objects[list_index] = NULL;
2318 vme_context->used_references[list_index] = NULL;
2319 vme_context->ref_index_in_mb[list_index] = 0;
2323 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2324 struct encode_state *encode_state,
2325 struct intel_encoder_context *encoder_context)
2327 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2328 struct gen6_vme_context *vme_context = encoder_context->vme_context;
2329 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2330 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2331 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2332 int qp, m_cost, j, mv_count;
2333 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2334 float lambda, m_costf;
2336 /* here no SI SP slice for HEVC, do not need slice fixup */
2337 int slice_type = slice_param->slice_type;
2340 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2342 if (encoder_context->rate_control_mode == VA_RC_CBR) {
2343 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2344 if (slice_type == HEVC_SLICE_B) {
2345 if (pSequenceParameter->ip_period == 1) {
2346 slice_type = HEVC_SLICE_P;
2347 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2349 } else if (mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1) {
2350 slice_type = HEVC_SLICE_P;
2351 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2357 if (vme_state_message == NULL)
2360 assert(qp <= QP_MAX);
2361 lambda = intel_lambda_qp(qp);
2362 if (slice_type == HEVC_SLICE_I) {
2363 vme_state_message[MODE_INTRA_16X16] = 0;
2364 m_cost = lambda * 4;
2365 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2366 m_cost = lambda * 16;
2367 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2368 m_cost = lambda * 3;
2369 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2372 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2373 for (j = 1; j < 3; j++) {
2374 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2375 m_cost = (int)m_costf;
2376 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2379 for (j = 4; j <= 64; j *= 2) {
2380 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2381 m_cost = (int)m_costf;
2382 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2387 vme_state_message[MODE_INTRA_16X16] = 0x4a;
2388 vme_state_message[MODE_INTRA_8X8] = 0x4a;
2389 vme_state_message[MODE_INTRA_4X4] = 0x4a;
2390 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2391 vme_state_message[MODE_INTER_16X16] = 0x4a;
2392 vme_state_message[MODE_INTER_16X8] = 0x4a;
2393 vme_state_message[MODE_INTER_8X8] = 0x4a;
2394 vme_state_message[MODE_INTER_8X4] = 0x4a;
2395 vme_state_message[MODE_INTER_4X4] = 0x4a;
2396 vme_state_message[MODE_INTER_BWD] = 0x2a;
2399 m_costf = lambda * 10;
2400 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2401 m_cost = lambda * 14;
2402 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2403 m_cost = lambda * 24;
2404 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2405 m_costf = lambda * 3.5;
2407 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2408 if (slice_type == HEVC_SLICE_P) {
2409 m_costf = lambda * 2.5;
2411 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2412 m_costf = lambda * 4;
2414 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2415 m_costf = lambda * 1.5;
2417 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2418 m_costf = lambda * 3;
2420 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2421 m_costf = lambda * 5;
2423 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2424 /* BWD is not used in P-frame */
2425 vme_state_message[MODE_INTER_BWD] = 0;
2427 m_costf = lambda * 2.5;
2429 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2430 m_costf = lambda * 5.5;
2432 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2433 m_costf = lambda * 3.5;
2435 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2436 m_costf = lambda * 5.0;
2438 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2439 m_costf = lambda * 6.5;
2441 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2442 m_costf = lambda * 1.5;
2444 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);