OSDN Git Service

Make the compiler happy
[android-x86/hardware-intel-common-vaapi.git] / src / gen6_mfc_common.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "gen9_mfc.h"
45 #include "intel_media.h"
46
47 #ifndef HAVE_LOG2F
48 #define log2f(x) (logf(x)/(float)M_LN2)
49 #endif
50
51 int intel_avc_enc_slice_type_fixup(int slice_type)
52 {
53     if (slice_type == SLICE_TYPE_SP ||
54         slice_type == SLICE_TYPE_P)
55         slice_type = SLICE_TYPE_P;
56     else if (slice_type == SLICE_TYPE_SI ||
57              slice_type == SLICE_TYPE_I)
58         slice_type = SLICE_TYPE_I;
59     else {
60         if (slice_type != SLICE_TYPE_B)
61             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
62
63         slice_type = SLICE_TYPE_B;
64     }
65
66     return slice_type;
67 }
68
69 static void
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
71                                         struct intel_encoder_context *encoder_context)
72 {
73     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
74     int i;
75
76     for (i = 0 ; i < 3; i++) {
77         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79         mfc_context->bit_rate_control_context[i].GrowInit = 6;
80         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
83
84         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
90     }
91 }
92
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94                                struct intel_encoder_context* encoder_context)
95 {
96     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97     double bitrate, framerate;
98     double frame_per_bits = 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99     double qp1_size = 0.1 * frame_per_bits;
100     double qp51_size = 0.001 * frame_per_bits;
101     int min_qp = MAX(1, encoder_context->brc.min_qp);
102     double bpf, factor, hrd_factor;
103     int inum = encoder_context->brc.num_iframes_in_gop,
104         pnum = encoder_context->brc.num_pframes_in_gop,
105         bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
106     int intra_period = encoder_context->brc.gop_size;
107     int i;
108
109     if (encoder_context->layer.num_layers > 1)
110         qp1_size = 0.15 * frame_per_bits;
111
112     mfc_context->brc.mode = encoder_context->rate_control_mode;
113
114     mfc_context->hrd.violation_noted = 0;
115
116     for (i = 0; i < encoder_context->layer.num_layers; i++) {
117         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
118         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
119         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
120
121         if (i == 0) {
122             bitrate = encoder_context->brc.bits_per_second[0];
123             framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
124         } else {
125             bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
126             framerate = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) -
127                         ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
128         }
129
130         if (mfc_context->brc.mode == VA_RC_VBR && encoder_context->brc.target_percentage[i])
131             bitrate = bitrate * encoder_context->brc.target_percentage[i] / 100;
132
133         if (i == encoder_context->layer.num_layers - 1)
134             factor = 1.0;
135         else {
136             factor = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) /
137                      ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
138         }
139
140         hrd_factor = (double)bitrate / encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
141
142         mfc_context->hrd.buffer_size[i] = (unsigned int)(encoder_context->brc.hrd_buffer_size * hrd_factor);
143         mfc_context->hrd.current_buffer_fullness[i] =
144             (double)(encoder_context->brc.hrd_initial_buffer_fullness < encoder_context->brc.hrd_buffer_size) ?
145             encoder_context->brc.hrd_initial_buffer_fullness : encoder_context->brc.hrd_buffer_size / 2.;
146         mfc_context->hrd.current_buffer_fullness[i] *= hrd_factor;
147         mfc_context->hrd.target_buffer_fullness[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / 2.;
148         mfc_context->hrd.buffer_capacity[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / qp1_size;
149
150         if (encoder_context->layer.num_layers > 1) {
151             if (i == 0) {
152                 intra_period = (int)(encoder_context->brc.gop_size * factor);
153                 inum = 1;
154                 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor);
155                 bnum = intra_period - inum - pnum;
156             } else {
157                 intra_period = (int)(encoder_context->brc.gop_size * factor) - intra_period;
158                 inum = 0;
159                 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor) - pnum;
160                 bnum = intra_period - inum - pnum;
161             }
162         }
163
164         mfc_context->brc.gop_nums[i][SLICE_TYPE_I] = inum;
165         mfc_context->brc.gop_nums[i][SLICE_TYPE_P] = pnum;
166         mfc_context->brc.gop_nums[i][SLICE_TYPE_B] = bnum;
167
168         mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period) / framerate) /
169                                                                     (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
170         mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
171         mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
172
173         bpf = mfc_context->brc.bits_per_frame[i] = bitrate / framerate;
174
175         if (encoder_context->brc.initial_qp) {
176             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = encoder_context->brc.initial_qp;
177             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = encoder_context->brc.initial_qp;
178             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = encoder_context->brc.initial_qp;
179         } else {
180             if ((bpf > qp51_size) && (bpf < qp1_size)) {
181                 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50 * (bpf - qp51_size) / (qp1_size - qp51_size);
182             } else if (bpf >= qp1_size)
183                 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
184             else if (bpf <= qp51_size)
185                 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
186
187             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
188             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
189         }
190
191         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], min_qp, 51);
192         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], min_qp, 51);
193         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], min_qp, 51);
194     }
195 }
196
197 int intel_mfc_update_hrd(struct encode_state *encode_state,
198                          struct intel_encoder_context *encoder_context,
199                          int frame_bits)
200 {
201     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
202     int layer_id = encoder_context->layer.curr_frame_layer_id;
203     double prev_bf = mfc_context->hrd.current_buffer_fullness[layer_id];
204
205     mfc_context->hrd.current_buffer_fullness[layer_id] -= frame_bits;
206
207     if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] <= 0.) {
208         mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
209         return BRC_UNDERFLOW;
210     }
211
212     mfc_context->hrd.current_buffer_fullness[layer_id] += mfc_context->brc.bits_per_frame[layer_id];
213     if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] > mfc_context->hrd.buffer_size[layer_id]) {
214         if (mfc_context->brc.mode == VA_RC_VBR)
215             mfc_context->hrd.current_buffer_fullness[layer_id] = mfc_context->hrd.buffer_size[layer_id];
216         else {
217             mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
218             return BRC_OVERFLOW;
219         }
220     }
221     return BRC_NO_HRD_VIOLATION;
222 }
223
224 static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
225                                       struct intel_encoder_context *encoder_context,
226                                       int frame_bits)
227 {
228     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
229     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
230     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
231     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
232     int curr_frame_layer_id, next_frame_layer_id;
233     int qpi, qpp, qpb;
234     int qp; // quantizer of previously encoded slice of current type
235     int qpn; // predicted quantizer for next frame of current type in integer format
236     double qpf; // predicted quantizer for next frame of current type in float format
237     double delta_qp; // QP correction
238     int min_qp = MAX(1, encoder_context->brc.min_qp);
239     int target_frame_size, frame_size_next;
240     /* Notes:
241      *  x - how far we are from HRD buffer borders
242      *  y - how far we are from target HRD buffer fullness
243      */
244     double x, y;
245     double frame_size_alpha;
246
247     if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
248         curr_frame_layer_id = 0;
249         next_frame_layer_id = 0;
250     } else {
251         curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
252         next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
253     }
254
255     /* checking wthether HRD compliance first */
256     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
257
258     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
259         /* nothing */
260     } else {
261         next_frame_layer_id = curr_frame_layer_id;
262     }
263
264     mfc_context->brc.bits_prev_frame[curr_frame_layer_id] = frame_bits;
265     frame_bits = mfc_context->brc.bits_prev_frame[next_frame_layer_id];
266
267     mfc_context->brc.prev_slice_type[curr_frame_layer_id] = slicetype;
268     slicetype = mfc_context->brc.prev_slice_type[next_frame_layer_id];
269
270     /* 0 means the next frame is the first frame of next layer */
271     if (frame_bits == 0)
272         return sts;
273
274     qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
275     qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
276     qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
277
278     qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
279
280     target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
281     if (mfc_context->hrd.buffer_capacity[next_frame_layer_id] < 5)
282         frame_size_alpha = 0;
283     else
284         frame_size_alpha = (double)mfc_context->brc.gop_nums[next_frame_layer_id][slicetype];
285     if (frame_size_alpha > 30) frame_size_alpha = 30;
286     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
287                       (double)(frame_size_alpha + 1.);
288
289     /* frame_size_next: avoiding negative number and too small value */
290     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
291         frame_size_next = (int)((double)target_frame_size * 0.25);
292
293     qpf = (double)qp * target_frame_size / frame_size_next;
294     qpn = (int)(qpf + 0.5);
295
296     if (qpn == qp) {
297         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
298         mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] += qpf - qpn;
299         if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] > 1.0) {
300             qpn++;
301             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
302         } else if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] < -1.0) {
303             qpn--;
304             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
305         }
306     }
307     /* making sure that QP is not changing too fast */
308     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
309     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
310     /* making sure that with QP predictions we did do not leave QPs range */
311     BRC_CLIP(qpn, 1, 51);
312
313     /* calculating QP delta as some function*/
314     x = mfc_context->hrd.target_buffer_fullness[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
315     if (x > 0) {
316         x /= mfc_context->hrd.target_buffer_fullness[next_frame_layer_id];
317         y = mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
318     } else {
319         x /= (mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.target_buffer_fullness[next_frame_layer_id]);
320         y = mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
321     }
322     if (y < 0.01) y = 0.01;
323     if (x > 1) x = 1;
324     else if (x < -1) x = -1;
325
326     delta_qp = BRC_QP_MAX_CHANGE * exp(-1 / y) * sin(BRC_PI_0_5 * x);
327     qpn = (int)(qpn + delta_qp + 0.5);
328
329     /* making sure that with QP predictions we did do not leave QPs range */
330     BRC_CLIP(qpn, min_qp, 51);
331
332     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
333         /* correcting QPs of slices of other types */
334         if (slicetype == SLICE_TYPE_P) {
335             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
336                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
337             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
338                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
339         } else if (slicetype == SLICE_TYPE_I) {
340             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
341                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
342             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
343                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
344         } else { // SLICE_TYPE_B
345             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
346                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
347             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
348                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
349         }
350         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], min_qp, 51);
351         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], min_qp, 51);
352         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], min_qp, 51);
353     } else if (sts == BRC_UNDERFLOW) { // underflow
354         if (qpn <= qp) qpn = qp + 1;
355         if (qpn > 51) {
356             qpn = 51;
357             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
358         }
359     } else if (sts == BRC_OVERFLOW) {
360         if (qpn >= qp) qpn = qp - 1;
361         if (qpn < min_qp) { // overflow with minQP
362             qpn = min_qp;
363             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
364         }
365     }
366
367     mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
368
369     return sts;
370 }
371
372 static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
373                                       struct intel_encoder_context *encoder_context,
374                                       int frame_bits)
375 {
376     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
377     gen6_brc_status sts;
378     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
379     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
380     int *qp = mfc_context->brc.qp_prime_y[0];
381     int min_qp = MAX(1, encoder_context->brc.min_qp);
382     int qp_delta, large_frame_adjustment;
383
384     // This implements a simple reactive VBR rate control mode for single-layer H.264.  The primary
385     // aim here is to avoid the problematic behaviour that the CBR rate controller displays on
386     // scene changes, where the QP can get pushed up by a large amount in a short period and
387     // compromise the quality of following frames to a very visible degree.
388     // The main idea, then, is to try to keep the HRD buffering above the target level most of the
389     // time, so that when a large frame is generated (on a scene change or when the stream
390     // complexity increases) we have plenty of slack to be able to encode the more difficult region
391     // without compromising quality immediately on the following frames.   It is optimistic about
392     // the complexity of future frames, so even after generating one or more large frames on a
393     // significant change it will try to keep the QP at its current level until the HRD buffer
394     // bounds force a change to maintain the intended rate.
395
396     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
397
398     // This adjustment is applied to increase the QP by more than we normally would if a very
399     // large frame is encountered and we are in danger of running out of slack.
400     large_frame_adjustment = rint(2.0 * log(frame_bits / mfc_context->brc.target_frame_size[0][slice_type]));
401
402     if (sts == BRC_UNDERFLOW) {
403         // The frame is far too big and we don't have the bits available to send it, so it will
404         // have to be re-encoded at a higher QP.
405         qp_delta = +2;
406         if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
407             qp_delta += large_frame_adjustment;
408     } else if (sts == BRC_OVERFLOW) {
409         // The frame is very small and we are now overflowing the HRD buffer.  Currently this case
410         // does not occur because we ignore overflow in VBR mode.
411         assert(0 && "Overflow in VBR mode");
412     } else if (frame_bits <= mfc_context->brc.target_frame_size[0][slice_type]) {
413         // The frame is smaller than the average size expected for this frame type.
414         if (mfc_context->hrd.current_buffer_fullness[0] >
415             (mfc_context->hrd.target_buffer_fullness[0] + mfc_context->hrd.buffer_size[0]) / 2.0) {
416             // We currently have lots of bits available, so decrease the QP slightly for the next
417             // frame.
418             qp_delta = -1;
419         } else {
420             // The HRD buffer fullness is increasing, so do nothing.  (We may be under the target
421             // level here, but are moving in the right direction.)
422             qp_delta = 0;
423         }
424     } else {
425         // The frame is larger than the average size expected for this frame type.
426         if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0]) {
427             // We are currently over the target level, so do nothing.
428             qp_delta = 0;
429         } else if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0] / 2.0) {
430             // We are under the target level, but not critically.  Increase the QP by one step if
431             // continuing like this would underflow soon (currently within one second).
432             if (mfc_context->hrd.current_buffer_fullness[0] /
433                 (double)(frame_bits - mfc_context->brc.target_frame_size[0][slice_type] + 1) <
434                 ((double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den))
435                 qp_delta = +1;
436             else
437                 qp_delta = 0;
438         } else {
439             // We are a long way under the target level.  Always increase the QP, possibly by a
440             // larger amount dependent on how big the frame we just made actually was.
441             qp_delta = +1 + large_frame_adjustment;
442         }
443     }
444
445     switch (slice_type) {
446     case SLICE_TYPE_I:
447         qp[SLICE_TYPE_I] += qp_delta;
448         qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
449         qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
450         break;
451     case SLICE_TYPE_P:
452         qp[SLICE_TYPE_P] += qp_delta;
453         qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
454         qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
455         break;
456     case SLICE_TYPE_B:
457         qp[SLICE_TYPE_B] += qp_delta;
458         qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
459         qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
460         break;
461     }
462     BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qp, 51);
463     BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qp, 51);
464     BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], min_qp, 51);
465
466     if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
467         sts = BRC_UNDERFLOW_WITH_MAX_QP;
468     if (sts == BRC_OVERFLOW && qp[slice_type] == min_qp)
469         sts = BRC_OVERFLOW_WITH_MIN_QP;
470
471     return sts;
472 }
473
474 int intel_mfc_brc_postpack(struct encode_state *encode_state,
475                            struct intel_encoder_context *encoder_context,
476                            int frame_bits)
477 {
478     switch (encoder_context->rate_control_mode) {
479     case VA_RC_CBR:
480         return intel_mfc_brc_postpack_cbr(encode_state, encoder_context, frame_bits);
481     case VA_RC_VBR:
482         return intel_mfc_brc_postpack_vbr(encode_state, encoder_context, frame_bits);
483     }
484     assert(0 && "Invalid RC mode");
485     return 1;
486 }
487
488 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
489                                        struct intel_encoder_context *encoder_context)
490 {
491     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
492     unsigned int rate_control_mode = encoder_context->rate_control_mode;
493     int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
494
495     // current we only support CBR mode.
496     if (rate_control_mode == VA_RC_CBR) {
497         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
498         mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
499         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
500         mfc_context->vui_hrd.i_frame_number = 0;
501
502         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
503         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
504         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
505     }
506
507 }
508
509 void
510 intel_mfc_hrd_context_update(struct encode_state *encode_state,
511                              struct gen6_mfc_context *mfc_context)
512 {
513     mfc_context->vui_hrd.i_frame_number++;
514 }
515
516 int intel_mfc_interlace_check(VADriverContextP ctx,
517                               struct encode_state *encode_state,
518                               struct intel_encoder_context *encoder_context)
519 {
520     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
521     VAEncSliceParameterBufferH264 *pSliceParameter;
522     int i;
523     int mbCount = 0;
524     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
525     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
526
527     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
528         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
529         mbCount += pSliceParameter->num_macroblocks;
530     }
531
532     if (mbCount == (width_in_mbs * height_in_mbs))
533         return 0;
534
535     return 1;
536 }
537
538 void intel_mfc_brc_prepare(struct encode_state *encode_state,
539                            struct intel_encoder_context *encoder_context)
540 {
541     unsigned int rate_control_mode = encoder_context->rate_control_mode;
542
543     if (encoder_context->codec != CODEC_H264 &&
544         encoder_context->codec != CODEC_H264_MVC)
545         return;
546
547     if (rate_control_mode != VA_RC_CQP) {
548         /*Programing bit rate control */
549         if (encoder_context->brc.need_reset) {
550             intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
551             intel_mfc_brc_init(encode_state, encoder_context);
552         }
553
554         /*Programing HRD control */
555         if (encoder_context->brc.need_reset)
556             intel_mfc_hrd_context_init(encode_state, encoder_context);
557     }
558 }
559
560 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
561                                               struct encode_state *encode_state,
562                                               struct intel_encoder_context *encoder_context,
563                                               struct intel_batchbuffer *slice_batch)
564 {
565     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
566     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
567     unsigned int rate_control_mode = encoder_context->rate_control_mode;
568     unsigned int skip_emul_byte_cnt;
569
570     if (encode_state->packed_header_data[idx]) {
571         VAEncPackedHeaderParameterBuffer *param = NULL;
572         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
573         unsigned int length_in_bits;
574
575         assert(encode_state->packed_header_param[idx]);
576         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
577         length_in_bits = param->bit_length;
578
579         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
580         mfc_context->insert_object(ctx,
581                                    encoder_context,
582                                    header_data,
583                                    ALIGN(length_in_bits, 32) >> 5,
584                                    length_in_bits & 0x1f,
585                                    skip_emul_byte_cnt,
586                                    0,
587                                    0,
588                                    !param->has_emulation_bytes,
589                                    slice_batch);
590     }
591
592     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
593
594     if (encode_state->packed_header_data[idx]) {
595         VAEncPackedHeaderParameterBuffer *param = NULL;
596         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
597         unsigned int length_in_bits;
598
599         assert(encode_state->packed_header_param[idx]);
600         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
601         length_in_bits = param->bit_length;
602
603         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
604
605         mfc_context->insert_object(ctx,
606                                    encoder_context,
607                                    header_data,
608                                    ALIGN(length_in_bits, 32) >> 5,
609                                    length_in_bits & 0x1f,
610                                    skip_emul_byte_cnt,
611                                    0,
612                                    0,
613                                    !param->has_emulation_bytes,
614                                    slice_batch);
615     }
616
617     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
618
619     if (encode_state->packed_header_data[idx]) {
620         VAEncPackedHeaderParameterBuffer *param = NULL;
621         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
622         unsigned int length_in_bits;
623
624         assert(encode_state->packed_header_param[idx]);
625         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
626         length_in_bits = param->bit_length;
627
628         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
629         mfc_context->insert_object(ctx,
630                                    encoder_context,
631                                    header_data,
632                                    ALIGN(length_in_bits, 32) >> 5,
633                                    length_in_bits & 0x1f,
634                                    skip_emul_byte_cnt,
635                                    0,
636                                    0,
637                                    !param->has_emulation_bytes,
638                                    slice_batch);
639     } else if (rate_control_mode == VA_RC_CBR) {
640         // this is frist AU
641         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
642
643         unsigned char *sei_data = NULL;
644
645         int length_in_bits = build_avc_sei_buffer_timing(
646                                  mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
647                                  mfc_context->vui_hrd.i_initial_cpb_removal_delay,
648                                  0,
649                                  mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
650                                  mfc_context->vui_hrd.i_dpb_output_delay_length,
651                                  0,
652                                  &sei_data);
653         mfc_context->insert_object(ctx,
654                                    encoder_context,
655                                    (unsigned int *)sei_data,
656                                    ALIGN(length_in_bits, 32) >> 5,
657                                    length_in_bits & 0x1f,
658                                    5,
659                                    0,
660                                    0,
661                                    1,
662                                    slice_batch);
663         free(sei_data);
664     }
665 }
666
667 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
668                                struct encode_state *encode_state,
669                                struct intel_encoder_context *encoder_context)
670 {
671     struct i965_driver_data *i965 = i965_driver_data(ctx);
672     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
673     struct object_surface *obj_surface;
674     struct object_buffer *obj_buffer;
675     GenAvcSurface *gen6_avc_surface;
676     dri_bo *bo;
677     VAStatus vaStatus = VA_STATUS_SUCCESS;
678     int i, j, enable_avc_ildb = 0;
679     VAEncSliceParameterBufferH264 *slice_param;
680     struct i965_coded_buffer_segment *coded_buffer_segment;
681     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
682     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
683     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
684
685     if (IS_GEN6(i965->intel.device_info)) {
686         /* On the SNB it should be fixed to 128 for the DMV buffer */
687         width_in_mbs = 128;
688     }
689
690     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
691         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
692         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
693
694         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
695             assert((slice_param->slice_type == SLICE_TYPE_I) ||
696                    (slice_param->slice_type == SLICE_TYPE_SI) ||
697                    (slice_param->slice_type == SLICE_TYPE_P) ||
698                    (slice_param->slice_type == SLICE_TYPE_SP) ||
699                    (slice_param->slice_type == SLICE_TYPE_B));
700
701             if (slice_param->disable_deblocking_filter_idc != 1) {
702                 enable_avc_ildb = 1;
703                 break;
704             }
705
706             slice_param++;
707         }
708     }
709
710     /*Setup all the input&output object*/
711
712     /* Setup current frame and current direct mv buffer*/
713     obj_surface = encode_state->reconstructed_object;
714     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
715
716     if (obj_surface->private_data == NULL) {
717         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
718         assert(gen6_avc_surface);
719         gen6_avc_surface->dmv_top =
720             dri_bo_alloc(i965->intel.bufmgr,
721                          "Buffer",
722                          68 * width_in_mbs * height_in_mbs,
723                          64);
724         gen6_avc_surface->dmv_bottom =
725             dri_bo_alloc(i965->intel.bufmgr,
726                          "Buffer",
727                          68 * width_in_mbs * height_in_mbs,
728                          64);
729         assert(gen6_avc_surface->dmv_top);
730         assert(gen6_avc_surface->dmv_bottom);
731         obj_surface->private_data = (void *)gen6_avc_surface;
732         obj_surface->free_private_data = (void *)gen_free_avc_surface;
733     }
734     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
735     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
736     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
737     dri_bo_reference(gen6_avc_surface->dmv_top);
738     dri_bo_reference(gen6_avc_surface->dmv_bottom);
739
740     if (enable_avc_ildb) {
741         mfc_context->post_deblocking_output.bo = obj_surface->bo;
742         dri_bo_reference(mfc_context->post_deblocking_output.bo);
743     } else {
744         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
745         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
746     }
747
748     mfc_context->surface_state.width = obj_surface->orig_width;
749     mfc_context->surface_state.height = obj_surface->orig_height;
750     mfc_context->surface_state.w_pitch = obj_surface->width;
751     mfc_context->surface_state.h_pitch = obj_surface->height;
752
753     /* Setup reference frames and direct mv buffers*/
754     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
755         obj_surface = encode_state->reference_objects[i];
756
757         if (obj_surface && obj_surface->bo) {
758             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
759             dri_bo_reference(obj_surface->bo);
760
761             /* Check DMV buffer */
762             if (obj_surface->private_data == NULL) {
763
764                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
765                 assert(gen6_avc_surface);
766                 gen6_avc_surface->dmv_top =
767                     dri_bo_alloc(i965->intel.bufmgr,
768                                  "Buffer",
769                                  68 * width_in_mbs * height_in_mbs,
770                                  64);
771                 gen6_avc_surface->dmv_bottom =
772                     dri_bo_alloc(i965->intel.bufmgr,
773                                  "Buffer",
774                                  68 * width_in_mbs * height_in_mbs,
775                                  64);
776                 assert(gen6_avc_surface->dmv_top);
777                 assert(gen6_avc_surface->dmv_bottom);
778                 obj_surface->private_data = gen6_avc_surface;
779                 obj_surface->free_private_data = gen_free_avc_surface;
780             }
781
782             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
783             /* Setup DMV buffer */
784             mfc_context->direct_mv_buffers[i * 2].bo = gen6_avc_surface->dmv_top;
785             mfc_context->direct_mv_buffers[i * 2 + 1].bo = gen6_avc_surface->dmv_bottom;
786             dri_bo_reference(gen6_avc_surface->dmv_top);
787             dri_bo_reference(gen6_avc_surface->dmv_bottom);
788         } else {
789             break;
790         }
791     }
792
793     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
794     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
795
796     obj_buffer = encode_state->coded_buf_object;
797     bo = obj_buffer->buffer_store->bo;
798     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
799     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
800     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
801     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
802
803     dri_bo_map(bo, 1);
804     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
805     coded_buffer_segment->mapped = 0;
806     coded_buffer_segment->codec = encoder_context->codec;
807     dri_bo_unmap(bo);
808
809     return vaStatus;
810 }
811 /*
812  * The LUT uses the pair of 4-bit units: (shift, base) structure.
813  * 2^K * X = value .
814  * So it is necessary to convert one cost into the nearest LUT format.
815  * The derivation is:
816  * 2^K *x = 2^n * (1 + deltaX)
817  *    k + log2(x) = n + log2(1 + deltaX)
818  *    log2(x) = n - k + log2(1 + deltaX)
819  *    As X is in the range of [1, 15]
820  *      4 > n - k + log2(1 + deltaX) >= 0
821  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
822  *    Then we can derive the corresponding K and get the nearest LUT format.
823  */
824 int intel_format_lutvalue(int value, int max)
825 {
826     int ret;
827     int logvalue, temp1, temp2;
828
829     if (value <= 0)
830         return 0;
831
832     logvalue = (int)(log2f((float)value));
833     if (logvalue < 4) {
834         ret = value;
835     } else {
836         int error, temp_value, base, j, temp_err;
837         error = value;
838         j = logvalue - 4 + 1;
839         ret = -1;
840         for (; j <= logvalue; j++) {
841             if (j == 0) {
842                 base = value >> j;
843             } else {
844                 base = (value + (1 << (j - 1)) - 1) >> j;
845             }
846             if (base >= 16)
847                 continue;
848
849             temp_value = base << j;
850             temp_err = abs(value - temp_value);
851             if (temp_err < error) {
852                 error = temp_err;
853                 ret = (j << 4) | base;
854                 if (temp_err == 0)
855                     break;
856             }
857         }
858     }
859     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
860     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
861     if (temp1 > temp2)
862         ret = max;
863     return ret;
864
865 }
866
867
868 #define     QP_MAX          52
869 #define     VP8_QP_MAX          128
870
871
872 static float intel_lambda_qp(int qp)
873 {
874     float value, lambdaf;
875     value = qp;
876     value = value / 6 - 2;
877     if (value < 0)
878         value = 0;
879     lambdaf = roundf(powf(2, value));
880     return lambdaf;
881 }
882
883 static
884 void intel_h264_calc_mbmvcost_qp(int qp,
885                                  int slice_type,
886                                  uint8_t *vme_state_message)
887 {
888     int m_cost, j, mv_count;
889     float   lambda, m_costf;
890
891     assert(qp <= QP_MAX);
892     lambda = intel_lambda_qp(qp);
893
894     m_cost = lambda;
895     vme_state_message[MODE_CHROMA_INTRA] = 0;
896     vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
897
898     if (slice_type == SLICE_TYPE_I) {
899         vme_state_message[MODE_INTRA_16X16] = 0;
900         m_cost = lambda * 4;
901         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
902         m_cost = lambda * 16;
903         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
904         m_cost = lambda * 3;
905         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
906     } else {
907         m_cost = 0;
908         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
909         for (j = 1; j < 3; j++) {
910             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
911             m_cost = (int)m_costf;
912             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
913         }
914         mv_count = 3;
915         for (j = 4; j <= 64; j *= 2) {
916             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
917             m_cost = (int)m_costf;
918             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
919             mv_count++;
920         }
921
922         if (qp <= 25) {
923             vme_state_message[MODE_INTRA_16X16] = 0x4a;
924             vme_state_message[MODE_INTRA_8X8] = 0x4a;
925             vme_state_message[MODE_INTRA_4X4] = 0x4a;
926             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
927             vme_state_message[MODE_INTER_16X16] = 0x4a;
928             vme_state_message[MODE_INTER_16X8] = 0x4a;
929             vme_state_message[MODE_INTER_8X8] = 0x4a;
930             vme_state_message[MODE_INTER_8X4] = 0x4a;
931             vme_state_message[MODE_INTER_4X4] = 0x4a;
932             vme_state_message[MODE_INTER_BWD] = 0x2a;
933             return;
934         }
935         m_costf = lambda * 10;
936         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
937         m_cost = lambda * 14;
938         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
939         m_cost = lambda * 24;
940         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
941         m_costf = lambda * 3.5;
942         m_cost = m_costf;
943         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
944         if (slice_type == SLICE_TYPE_P) {
945             m_costf = lambda * 2.5;
946             m_cost = m_costf;
947             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
948             m_costf = lambda * 4;
949             m_cost = m_costf;
950             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
951             m_costf = lambda * 1.5;
952             m_cost = m_costf;
953             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
954             m_costf = lambda * 3;
955             m_cost = m_costf;
956             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
957             m_costf = lambda * 5;
958             m_cost = m_costf;
959             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
960             /* BWD is not used in P-frame */
961             vme_state_message[MODE_INTER_BWD] = 0;
962         } else {
963             m_costf = lambda * 2.5;
964             m_cost = m_costf;
965             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
966             m_costf = lambda * 5.5;
967             m_cost = m_costf;
968             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
969             m_costf = lambda * 3.5;
970             m_cost = m_costf;
971             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
972             m_costf = lambda * 5.0;
973             m_cost = m_costf;
974             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
975             m_costf = lambda * 6.5;
976             m_cost = m_costf;
977             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
978             m_costf = lambda * 1.5;
979             m_cost = m_costf;
980             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
981         }
982     }
983     return;
984 }
985
986 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
987                                 struct encode_state *encode_state,
988                                 struct intel_encoder_context *encoder_context)
989 {
990     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
991     struct gen6_vme_context *vme_context = encoder_context->vme_context;
992     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
993     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
994     int qp;
995     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
996
997     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
998
999     if (encoder_context->rate_control_mode == VA_RC_CQP)
1000         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1001     else
1002         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1003
1004     if (vme_state_message == NULL)
1005         return;
1006
1007     intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
1008 }
1009
1010 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
1011                                     struct encode_state *encode_state,
1012                                     struct intel_encoder_context *encoder_context)
1013 {
1014     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1015     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1016     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1017     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
1018     int qp, m_cost, j, mv_count;
1019     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
1020     float   lambda, m_costf;
1021
1022     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
1023     int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
1024
1025     if (vme_state_message == NULL)
1026         return;
1027
1028     if (encoder_context->rate_control_mode == VA_RC_CQP)
1029         qp = q_matrix->quantization_index[0];
1030     else
1031         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1032
1033     lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
1034
1035     m_cost = lambda;
1036     vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
1037
1038     if (is_key_frame) {
1039         vme_state_message[MODE_INTRA_16X16] = 0;
1040         m_cost = lambda * 16;
1041         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
1042         m_cost = lambda * 3;
1043         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
1044     } else {
1045         m_cost = 0;
1046         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
1047         for (j = 1; j < 3; j++) {
1048             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1049             m_cost = (int)m_costf;
1050             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1051         }
1052         mv_count = 3;
1053         for (j = 4; j <= 64; j *= 2) {
1054             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1055             m_cost = (int)m_costf;
1056             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
1057             mv_count++;
1058         }
1059
1060         if (qp < 92) {
1061             vme_state_message[MODE_INTRA_16X16] = 0x4a;
1062             vme_state_message[MODE_INTRA_4X4] = 0x4a;
1063             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
1064             vme_state_message[MODE_INTER_16X16] = 0x4a;
1065             vme_state_message[MODE_INTER_16X8] = 0x4a;
1066             vme_state_message[MODE_INTER_8X8] = 0x4a;
1067             vme_state_message[MODE_INTER_4X4] = 0x4a;
1068             vme_state_message[MODE_INTER_BWD] = 0;
1069             return;
1070         }
1071         m_costf = lambda * 10;
1072         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1073         m_cost = lambda * 24;
1074         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
1075
1076         m_costf = lambda * 3.5;
1077         m_cost = m_costf;
1078         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
1079
1080         m_costf = lambda * 2.5;
1081         m_cost = m_costf;
1082         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1083         m_costf = lambda * 4;
1084         m_cost = m_costf;
1085         vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
1086         m_costf = lambda * 1.5;
1087         m_cost = m_costf;
1088         vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
1089         m_costf = lambda * 5;
1090         m_cost = m_costf;
1091         vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
1092         /* BWD is not used in P-frame */
1093         vme_state_message[MODE_INTER_BWD] = 0;
1094     }
1095 }
1096
1097 #define     MB_SCOREBOARD_A     (1 << 0)
1098 #define     MB_SCOREBOARD_B     (1 << 1)
1099 #define     MB_SCOREBOARD_C     (1 << 2)
1100 void
1101 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
1102 {
1103     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
1104     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
1105     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
1106                                                            MB_SCOREBOARD_B |
1107                                                            MB_SCOREBOARD_C);
1108
1109     /* In VME prediction the current mb depends on the neighbour
1110      * A/B/C macroblock. So the left/up/up-right dependency should
1111      * be considered.
1112      */
1113     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
1114     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
1115     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
1116     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
1117     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
1118     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
1119
1120     vme_context->gpe_context.vfe_desc7.dword = 0;
1121     return;
1122 }
1123
1124 /* check whether the mb of (x_index, y_index) is out of bound */
1125 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
1126 {
1127     int mb_index;
1128     if (x_index < 0 || x_index >= mb_width)
1129         return -1;
1130     if (y_index < 0 || y_index >= mb_height)
1131         return -1;
1132
1133     mb_index = y_index * mb_width + x_index;
1134     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
1135         return -1;
1136     return 0;
1137 }
1138
1139 void
1140 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1141                                      struct encode_state *encode_state,
1142                                      int mb_width, int mb_height,
1143                                      int kernel,
1144                                      int transform_8x8_mode_flag,
1145                                      struct intel_encoder_context *encoder_context)
1146 {
1147     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1148     int mb_row;
1149     int s;
1150     unsigned int *command_ptr;
1151     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1152     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1153     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1154     int qp, qp_mb, qp_index;
1155     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1156
1157     if (encoder_context->rate_control_mode == VA_RC_CQP)
1158         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1159     else
1160         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1161
1162 #define     USE_SCOREBOARD      (1 << 21)
1163
1164     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1165     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1166
1167     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1168         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1169         int first_mb = pSliceParameter->macroblock_address;
1170         int num_mb = pSliceParameter->num_macroblocks;
1171         unsigned int mb_intra_ub, score_dep;
1172         int x_outer, y_outer, x_inner, y_inner;
1173         int xtemp_outer = 0;
1174
1175         x_outer = first_mb % mb_width;
1176         y_outer = first_mb / mb_width;
1177         mb_row = y_outer;
1178
1179         for (; x_outer < (mb_width - 2) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1180             x_inner = x_outer;
1181             y_inner = y_outer;
1182             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1183                 mb_intra_ub = 0;
1184                 score_dep = 0;
1185                 if (x_inner != 0) {
1186                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1187                     score_dep |= MB_SCOREBOARD_A;
1188                 }
1189                 if (y_inner != mb_row) {
1190                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1191                     score_dep |= MB_SCOREBOARD_B;
1192                     if (x_inner != 0)
1193                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1194                     if (x_inner != (mb_width - 1)) {
1195                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1196                         score_dep |= MB_SCOREBOARD_C;
1197                     }
1198                 }
1199
1200                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1201                 *command_ptr++ = kernel;
1202                 *command_ptr++ = USE_SCOREBOARD;
1203                 /* Indirect data */
1204                 *command_ptr++ = 0;
1205                 /* the (X, Y) term of scoreboard */
1206                 *command_ptr++ = ((y_inner << 16) | x_inner);
1207                 *command_ptr++ = score_dep;
1208                 /*inline data */
1209                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1210                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1211                 /* QP occupies one byte */
1212                 if (vme_context->roi_enabled) {
1213                     qp_index = y_inner * mb_width + x_inner;
1214                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1215                 } else
1216                     qp_mb = qp;
1217                 *command_ptr++ = qp_mb;
1218                 x_inner -= 2;
1219                 y_inner += 1;
1220             }
1221             x_outer += 1;
1222         }
1223
1224         xtemp_outer = mb_width - 2;
1225         if (xtemp_outer < 0)
1226             xtemp_outer = 0;
1227         x_outer = xtemp_outer;
1228         y_outer = first_mb / mb_width;
1229         for (; !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1230             y_inner = y_outer;
1231             x_inner = x_outer;
1232             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1233                 mb_intra_ub = 0;
1234                 score_dep = 0;
1235                 if (x_inner != 0) {
1236                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1237                     score_dep |= MB_SCOREBOARD_A;
1238                 }
1239                 if (y_inner != mb_row) {
1240                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1241                     score_dep |= MB_SCOREBOARD_B;
1242                     if (x_inner != 0)
1243                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1244
1245                     if (x_inner != (mb_width - 1)) {
1246                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1247                         score_dep |= MB_SCOREBOARD_C;
1248                     }
1249                 }
1250
1251                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1252                 *command_ptr++ = kernel;
1253                 *command_ptr++ = USE_SCOREBOARD;
1254                 /* Indirect data */
1255                 *command_ptr++ = 0;
1256                 /* the (X, Y) term of scoreboard */
1257                 *command_ptr++ = ((y_inner << 16) | x_inner);
1258                 *command_ptr++ = score_dep;
1259                 /*inline data */
1260                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1261                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1262                 /* qp occupies one byte */
1263                 if (vme_context->roi_enabled) {
1264                     qp_index = y_inner * mb_width + x_inner;
1265                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1266                 } else
1267                     qp_mb = qp;
1268                 *command_ptr++ = qp_mb;
1269
1270                 x_inner -= 2;
1271                 y_inner += 1;
1272             }
1273             x_outer++;
1274             if (x_outer >= mb_width) {
1275                 y_outer += 1;
1276                 x_outer = xtemp_outer;
1277             }
1278         }
1279     }
1280
1281     *command_ptr++ = 0;
1282     *command_ptr++ = MI_BATCH_BUFFER_END;
1283
1284     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1285 }
1286
1287 static uint8_t
1288 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1289 {
1290     unsigned int is_long_term =
1291         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1292     unsigned int is_top_field =
1293         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1294     unsigned int is_bottom_field =
1295         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1296
1297     return ((is_long_term                         << 6) |
1298             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1299             (frame_store_id                       << 1) |
1300             ((is_top_field ^ 1) & is_bottom_field));
1301 }
1302
1303 void
1304 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1305                             struct encode_state *encode_state,
1306                             struct intel_encoder_context *encoder_context)
1307 {
1308     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1309     struct intel_batchbuffer *batch = encoder_context->base.batch;
1310     int slice_type;
1311     struct object_surface *obj_surface;
1312     unsigned int fref_entry, bref_entry;
1313     int frame_index, i;
1314     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1315
1316     fref_entry = 0x80808080;
1317     bref_entry = 0x80808080;
1318     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1319
1320     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1321         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1322
1323         if (ref_idx_l0 > 3) {
1324             WARN_ONCE("ref_idx_l0 is out of range\n");
1325             ref_idx_l0 = 0;
1326         }
1327
1328         obj_surface = vme_context->used_reference_objects[0];
1329         frame_index = -1;
1330         for (i = 0; i < 16; i++) {
1331             if (obj_surface &&
1332                 obj_surface == encode_state->reference_objects[i]) {
1333                 frame_index = i;
1334                 break;
1335             }
1336         }
1337         if (frame_index == -1) {
1338             WARN_ONCE("RefPicList0 is not found in DPB!\n");
1339         } else {
1340             int ref_idx_l0_shift = ref_idx_l0 * 8;
1341             fref_entry &= ~(0xFF << ref_idx_l0_shift);
1342             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1343         }
1344     }
1345
1346     if (slice_type == SLICE_TYPE_B) {
1347         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1348
1349         if (ref_idx_l1 > 3) {
1350             WARN_ONCE("ref_idx_l1 is out of range\n");
1351             ref_idx_l1 = 0;
1352         }
1353
1354         obj_surface = vme_context->used_reference_objects[1];
1355         frame_index = -1;
1356         for (i = 0; i < 16; i++) {
1357             if (obj_surface &&
1358                 obj_surface == encode_state->reference_objects[i]) {
1359                 frame_index = i;
1360                 break;
1361             }
1362         }
1363         if (frame_index == -1) {
1364             WARN_ONCE("RefPicList1 is not found in DPB!\n");
1365         } else {
1366             int ref_idx_l1_shift = ref_idx_l1 * 8;
1367             bref_entry &= ~(0xFF << ref_idx_l1_shift);
1368             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1369         }
1370     }
1371
1372     BEGIN_BCS_BATCH(batch, 10);
1373     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1374     OUT_BCS_BATCH(batch, 0);                  //Select L0
1375     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1376     for (i = 0; i < 7; i++) {
1377         OUT_BCS_BATCH(batch, 0x80808080);
1378     }
1379     ADVANCE_BCS_BATCH(batch);
1380
1381     BEGIN_BCS_BATCH(batch, 10);
1382     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1383     OUT_BCS_BATCH(batch, 1);                  //Select L1
1384     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1385     for (i = 0; i < 7; i++) {
1386         OUT_BCS_BATCH(batch, 0x80808080);
1387     }
1388     ADVANCE_BCS_BATCH(batch);
1389 }
1390
1391
1392 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1393                                  struct encode_state *encode_state,
1394                                  struct intel_encoder_context *encoder_context)
1395 {
1396     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1397     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1398     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1399     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1400     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1401     uint32_t mv_x, mv_y;
1402     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1403     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1404     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1405
1406     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1407         mv_x = 512;
1408         mv_y = 64;
1409     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1410         mv_x = 1024;
1411         mv_y = 128;
1412     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1413         mv_x = 2048;
1414         mv_y = 128;
1415     } else {
1416         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1417         mv_x = 512;
1418         mv_y = 64;
1419     }
1420
1421     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1422     if (pic_param->picture_type != VAEncPictureTypeIntra) {
1423         int qp, m_cost, j, mv_count;
1424         float   lambda, m_costf;
1425         slice_param = (VAEncSliceParameterBufferMPEG2 *)
1426                       encode_state->slice_params_ext[0]->buffer;
1427         qp = slice_param->quantiser_scale_code;
1428         lambda = intel_lambda_qp(qp);
1429         /* No Intra prediction. So it is zero */
1430         vme_state_message[MODE_INTRA_8X8] = 0;
1431         vme_state_message[MODE_INTRA_4X4] = 0;
1432         vme_state_message[MODE_INTER_MV0] = 0;
1433         for (j = 1; j < 3; j++) {
1434             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1435             m_cost = (int)m_costf;
1436             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1437         }
1438         mv_count = 3;
1439         for (j = 4; j <= 64; j *= 2) {
1440             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1441             m_cost = (int)m_costf;
1442             vme_state_message[MODE_INTER_MV0 + mv_count] =
1443                 intel_format_lutvalue(m_cost, 0x6f);
1444             mv_count++;
1445         }
1446         m_cost = lambda;
1447         /* It can only perform the 16x16 search. So mode cost can be ignored for
1448          * the other mode. for example: 16x8/8x8
1449          */
1450         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1451         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1452
1453         vme_state_message[MODE_INTER_16X8] = 0;
1454         vme_state_message[MODE_INTER_8X8] = 0;
1455         vme_state_message[MODE_INTER_8X4] = 0;
1456         vme_state_message[MODE_INTER_4X4] = 0;
1457         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1458
1459     }
1460     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1461
1462     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1463                                                 width_in_mbs;
1464 }
1465
1466 void
1467 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1468                                            struct encode_state *encode_state,
1469                                            int mb_width, int mb_height,
1470                                            int kernel,
1471                                            struct intel_encoder_context *encoder_context)
1472 {
1473     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1474     unsigned int *command_ptr;
1475
1476 #define     MPEG2_SCOREBOARD        (1 << 21)
1477
1478     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1479     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1480
1481     {
1482         unsigned int mb_intra_ub, score_dep;
1483         int x_outer, y_outer, x_inner, y_inner;
1484         int xtemp_outer = 0;
1485         int first_mb = 0;
1486         int num_mb = mb_width * mb_height;
1487
1488         x_outer = 0;
1489         y_outer = 0;
1490
1491
1492         for (; x_outer < (mb_width - 2) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1493             x_inner = x_outer;
1494             y_inner = y_outer;
1495             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1496                 mb_intra_ub = 0;
1497                 score_dep = 0;
1498                 if (x_inner != 0) {
1499                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1500                     score_dep |= MB_SCOREBOARD_A;
1501                 }
1502                 if (y_inner != 0) {
1503                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1504                     score_dep |= MB_SCOREBOARD_B;
1505
1506                     if (x_inner != 0)
1507                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1508
1509                     if (x_inner != (mb_width - 1)) {
1510                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1511                         score_dep |= MB_SCOREBOARD_C;
1512                     }
1513                 }
1514
1515                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1516                 *command_ptr++ = kernel;
1517                 *command_ptr++ = MPEG2_SCOREBOARD;
1518                 /* Indirect data */
1519                 *command_ptr++ = 0;
1520                 /* the (X, Y) term of scoreboard */
1521                 *command_ptr++ = ((y_inner << 16) | x_inner);
1522                 *command_ptr++ = score_dep;
1523                 /*inline data */
1524                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1525                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1526                 x_inner -= 2;
1527                 y_inner += 1;
1528             }
1529             x_outer += 1;
1530         }
1531
1532         xtemp_outer = mb_width - 2;
1533         if (xtemp_outer < 0)
1534             xtemp_outer = 0;
1535         x_outer = xtemp_outer;
1536         y_outer = 0;
1537         for (; !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height);) {
1538             y_inner = y_outer;
1539             x_inner = x_outer;
1540             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1541                 mb_intra_ub = 0;
1542                 score_dep = 0;
1543                 if (x_inner != 0) {
1544                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1545                     score_dep |= MB_SCOREBOARD_A;
1546                 }
1547                 if (y_inner != 0) {
1548                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1549                     score_dep |= MB_SCOREBOARD_B;
1550
1551                     if (x_inner != 0)
1552                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1553
1554                     if (x_inner != (mb_width - 1)) {
1555                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1556                         score_dep |= MB_SCOREBOARD_C;
1557                     }
1558                 }
1559
1560                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1561                 *command_ptr++ = kernel;
1562                 *command_ptr++ = MPEG2_SCOREBOARD;
1563                 /* Indirect data */
1564                 *command_ptr++ = 0;
1565                 /* the (X, Y) term of scoreboard */
1566                 *command_ptr++ = ((y_inner << 16) | x_inner);
1567                 *command_ptr++ = score_dep;
1568                 /*inline data */
1569                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1570                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1571
1572                 x_inner -= 2;
1573                 y_inner += 1;
1574             }
1575             x_outer++;
1576             if (x_outer >= mb_width) {
1577                 y_outer += 1;
1578                 x_outer = xtemp_outer;
1579             }
1580         }
1581     }
1582
1583     *command_ptr++ = 0;
1584     *command_ptr++ = MI_BATCH_BUFFER_END;
1585
1586     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1587     return;
1588 }
1589
1590 static int
1591 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1592                           VAPictureH264 *ref_list,
1593                           int num_pictures,
1594                           int dir)
1595 {
1596     int i, found = -1, min = 0x7FFFFFFF;
1597
1598     for (i = 0; i < num_pictures; i++) {
1599         int tmp;
1600
1601         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1602             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1603             break;
1604
1605         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1606
1607         if (dir)
1608             tmp = -tmp;
1609
1610         if (tmp > 0 && tmp < min) {
1611             min = tmp;
1612             found = i;
1613         }
1614     }
1615
1616     return found;
1617 }
1618
1619 void
1620 intel_avc_vme_reference_state(VADriverContextP ctx,
1621                               struct encode_state *encode_state,
1622                               struct intel_encoder_context *encoder_context,
1623                               int list_index,
1624                               int surface_index,
1625                               void (* vme_source_surface_state)(
1626                                   VADriverContextP ctx,
1627                                   int index,
1628                                   struct object_surface *obj_surface,
1629                                   struct intel_encoder_context *encoder_context))
1630 {
1631     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1632     struct object_surface *obj_surface = NULL;
1633     struct i965_driver_data *i965 = i965_driver_data(ctx);
1634     VASurfaceID ref_surface_id;
1635     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1636     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1637     int max_num_references;
1638     VAPictureH264 *curr_pic;
1639     VAPictureH264 *ref_list;
1640     int ref_idx;
1641
1642     if (list_index == 0) {
1643         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1644         ref_list = slice_param->RefPicList0;
1645     } else {
1646         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1647         ref_list = slice_param->RefPicList1;
1648     }
1649
1650     if (max_num_references == 1) {
1651         if (list_index == 0) {
1652             ref_surface_id = slice_param->RefPicList0[0].picture_id;
1653             vme_context->used_references[0] = &slice_param->RefPicList0[0];
1654         } else {
1655             ref_surface_id = slice_param->RefPicList1[0].picture_id;
1656             vme_context->used_references[1] = &slice_param->RefPicList1[0];
1657         }
1658
1659         if (ref_surface_id != VA_INVALID_SURFACE)
1660             obj_surface = SURFACE(ref_surface_id);
1661
1662         if (!obj_surface ||
1663             !obj_surface->bo) {
1664             obj_surface = encode_state->reference_objects[list_index];
1665             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1666         }
1667
1668         ref_idx = 0;
1669     } else {
1670         curr_pic = &pic_param->CurrPic;
1671
1672         /* select the reference frame in temporal space */
1673         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1674         ref_surface_id = ref_list[ref_idx].picture_id;
1675
1676         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1677             obj_surface = SURFACE(ref_surface_id);
1678
1679         vme_context->used_reference_objects[list_index] = obj_surface;
1680         vme_context->used_references[list_index] = &ref_list[ref_idx];
1681     }
1682
1683     if (obj_surface &&
1684         obj_surface->bo) {
1685         assert(ref_idx >= 0);
1686         vme_context->used_reference_objects[list_index] = obj_surface;
1687         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1688         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1689                                                     ref_idx << 16 |
1690                                                     ref_idx <<  8 |
1691                                                     ref_idx);
1692     } else {
1693         vme_context->used_reference_objects[list_index] = NULL;
1694         vme_context->used_references[list_index] = NULL;
1695         vme_context->ref_index_in_mb[list_index] = 0;
1696     }
1697 }
1698
1699 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1700                                         struct encode_state *encode_state,
1701                                         struct intel_encoder_context *encoder_context,
1702                                         int slice_index,
1703                                         struct intel_batchbuffer *slice_batch)
1704 {
1705     int count, i, start_index;
1706     unsigned int length_in_bits;
1707     VAEncPackedHeaderParameterBuffer *param = NULL;
1708     unsigned int *header_data = NULL;
1709     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1710     int slice_header_index;
1711
1712     if (encode_state->slice_header_index[slice_index] == 0)
1713         slice_header_index = -1;
1714     else
1715         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1716
1717     count = encode_state->slice_rawdata_count[slice_index];
1718     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1719
1720     for (i = 0; i < count; i++) {
1721         unsigned int skip_emul_byte_cnt;
1722
1723         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1724
1725         param = (VAEncPackedHeaderParameterBuffer *)
1726                 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1727
1728         /* skip the slice header packed data type as it is lastly inserted */
1729         if (param->type == VAEncPackedHeaderSlice)
1730             continue;
1731
1732         length_in_bits = param->bit_length;
1733
1734         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1735
1736         /* as the slice header is still required, the last header flag is set to
1737          * zero.
1738          */
1739         mfc_context->insert_object(ctx,
1740                                    encoder_context,
1741                                    header_data,
1742                                    ALIGN(length_in_bits, 32) >> 5,
1743                                    length_in_bits & 0x1f,
1744                                    skip_emul_byte_cnt,
1745                                    0,
1746                                    0,
1747                                    !param->has_emulation_bytes,
1748                                    slice_batch);
1749     }
1750
1751     if (slice_header_index == -1) {
1752         unsigned char *slice_header = NULL;
1753         int slice_header_length_in_bits = 0;
1754         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1755         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1756         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1757
1758         /* No slice header data is passed. And the driver needs to generate it */
1759         /* For the Normal H264 */
1760         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1761                                                              pPicParameter,
1762                                                              pSliceParameter,
1763                                                              &slice_header);
1764         mfc_context->insert_object(ctx, encoder_context,
1765                                    (unsigned int *)slice_header,
1766                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1767                                    slice_header_length_in_bits & 0x1f,
1768                                    5,  /* first 5 bytes are start code + nal unit type */
1769                                    1, 0, 1, slice_batch);
1770
1771         free(slice_header);
1772     } else {
1773         unsigned int skip_emul_byte_cnt;
1774
1775         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1776
1777         param = (VAEncPackedHeaderParameterBuffer *)
1778                 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1779         length_in_bits = param->bit_length;
1780
1781         /* as the slice header is the last header data for one slice,
1782          * the last header flag is set to one.
1783          */
1784         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1785
1786         mfc_context->insert_object(ctx,
1787                                    encoder_context,
1788                                    header_data,
1789                                    ALIGN(length_in_bits, 32) >> 5,
1790                                    length_in_bits & 0x1f,
1791                                    skip_emul_byte_cnt,
1792                                    1,
1793                                    0,
1794                                    !param->has_emulation_bytes,
1795                                    slice_batch);
1796     }
1797
1798     return;
1799 }
1800
1801 void
1802 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1803                                 struct encode_state *encode_state,
1804                                 struct intel_encoder_context *encoder_context)
1805 {
1806     struct i965_driver_data *i965 = i965_driver_data(ctx);
1807     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1808     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1809     int qp;
1810     dri_bo *bo;
1811     uint8_t *cost_table;
1812
1813     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1814
1815
1816     if (slice_type == SLICE_TYPE_I) {
1817         if (vme_context->i_qp_cost_table)
1818             return;
1819     } else if (slice_type == SLICE_TYPE_P) {
1820         if (vme_context->p_qp_cost_table)
1821             return;
1822     } else {
1823         if (vme_context->b_qp_cost_table)
1824             return;
1825     }
1826
1827     /* It is enough to allocate 32 bytes for each qp. */
1828     bo = dri_bo_alloc(i965->intel.bufmgr,
1829                       "cost_table ",
1830                       QP_MAX * 32,
1831                       64);
1832
1833     dri_bo_map(bo, 1);
1834     assert(bo->virtual);
1835     cost_table = (uint8_t *)(bo->virtual);
1836     for (qp = 0; qp < QP_MAX; qp++) {
1837         intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1838         cost_table += 32;
1839     }
1840
1841     dri_bo_unmap(bo);
1842
1843     if (slice_type == SLICE_TYPE_I) {
1844         vme_context->i_qp_cost_table = bo;
1845     } else if (slice_type == SLICE_TYPE_P) {
1846         vme_context->p_qp_cost_table = bo;
1847     } else {
1848         vme_context->b_qp_cost_table = bo;
1849     }
1850
1851     vme_context->cost_table_size = QP_MAX * 32;
1852     return;
1853 }
1854
1855 extern void
1856 intel_h264_setup_cost_surface(VADriverContextP ctx,
1857                               struct encode_state *encode_state,
1858                               struct intel_encoder_context *encoder_context,
1859                               unsigned long binding_table_offset,
1860                               unsigned long surface_state_offset)
1861 {
1862     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1863     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1864     dri_bo *bo;
1865
1866
1867     struct i965_buffer_surface cost_table;
1868
1869     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1870
1871
1872     if (slice_type == SLICE_TYPE_I) {
1873         bo = vme_context->i_qp_cost_table;
1874     } else if (slice_type == SLICE_TYPE_P) {
1875         bo = vme_context->p_qp_cost_table;
1876     } else {
1877         bo = vme_context->b_qp_cost_table;
1878     }
1879
1880     cost_table.bo = bo;
1881     cost_table.num_blocks = QP_MAX;
1882     cost_table.pitch = 16;
1883     cost_table.size_block = 32;
1884
1885     vme_context->vme_buffer_suface_setup(ctx,
1886                                          &vme_context->gpe_context,
1887                                          &cost_table,
1888                                          binding_table_offset,
1889                                          surface_state_offset);
1890 }
1891
1892 /*
1893  * the idea of conversion between qp and qstep comes from scaling process
1894  * of transform coeff for Luma component in H264 spec.
1895  *   2^(Qpy / 6 - 6)
1896  * In order to avoid too small qstep, it is multiplied by 16.
1897  */
1898 static float intel_h264_qp_qstep(int qp)
1899 {
1900     float value, qstep;
1901     value = qp;
1902     value = value / 6 - 2;
1903     qstep = powf(2, value);
1904     return qstep;
1905 }
1906
1907 static int intel_h264_qstep_qp(float qstep)
1908 {
1909     float qp;
1910
1911     qp = 12.0f + 6.0f * log2f(qstep);
1912
1913     return floorf(qp);
1914 }
1915
1916 /*
1917  * Currently it is based on the following assumption:
1918  * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1919  *                 total_aread * 1 / baseqp_qstep
1920  *
1921  * qstep is the linearized quantizer of H264 quantizer
1922  */
1923 typedef struct {
1924     int row_start_in_mb;
1925     int row_end_in_mb;
1926     int col_start_in_mb;
1927     int col_end_in_mb;
1928
1929     int width_mbs;
1930     int height_mbs;
1931
1932     int roi_qp;
1933 } ROIRegionParam;
1934
1935 static VAStatus
1936 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1937                        int base_qp,
1938                        struct encode_state *encode_state,
1939                        struct intel_encoder_context *encoder_context)
1940 {
1941     int nonroi_qp;
1942     int min_qp = MAX(1, encoder_context->brc.min_qp);
1943     bool quickfill = 0;
1944
1945     ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1946     int num_roi = 0;
1947     int i, j;
1948
1949     float temp;
1950     float qstep_nonroi, qstep_base;
1951     float roi_area, total_area, nonroi_area;
1952     float sum_roi;
1953
1954     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1955     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1956     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1957     int mbs_in_picture = width_in_mbs * height_in_mbs;
1958
1959     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1960     VAStatus vaStatus = VA_STATUS_SUCCESS;
1961
1962     /* currently roi_value_is_qp_delta is the only supported mode of priority.
1963      *
1964      * qp_delta set by user is added to base_qp, which is then clapped by
1965      * [base_qp-min_delta, base_qp+max_delta].
1966      */
1967     ASSERT_RET(encoder_context->brc.roi_value_is_qp_delta, VA_STATUS_ERROR_INVALID_PARAMETER);
1968
1969     num_roi = encoder_context->brc.num_roi;
1970
1971     /* when the base_qp is lower than 12, the quality is quite good based
1972      * on the H264 test experience.
1973      * In such case it is unnecessary to adjust the quality for ROI region.
1974      */
1975     if (base_qp <= 12) {
1976         nonroi_qp = base_qp;
1977         quickfill = 1;
1978         goto qp_fill;
1979     }
1980
1981     sum_roi = 0.0f;
1982     roi_area = 0;
1983     for (i = 0; i < num_roi; i++) {
1984         int row_start, row_end, col_start, col_end;
1985         int roi_width_mbs, roi_height_mbs;
1986         int mbs_in_roi;
1987         int roi_qp;
1988         float qstep_roi;
1989
1990         col_start = encoder_context->brc.roi[i].left;
1991         col_end = encoder_context->brc.roi[i].right;
1992         row_start = encoder_context->brc.roi[i].top;
1993         row_end = encoder_context->brc.roi[i].bottom;
1994
1995         col_start = col_start / 16;
1996         col_end = (col_end + 15) / 16;
1997         row_start = row_start / 16;
1998         row_end = (row_end + 15) / 16;
1999
2000         roi_width_mbs = col_end - col_start;
2001         roi_height_mbs = row_end - row_start;
2002         mbs_in_roi = roi_width_mbs * roi_height_mbs;
2003
2004         param_regions[i].row_start_in_mb = row_start;
2005         param_regions[i].row_end_in_mb = row_end;
2006         param_regions[i].col_start_in_mb = col_start;
2007         param_regions[i].col_end_in_mb = col_end;
2008         param_regions[i].width_mbs = roi_width_mbs;
2009         param_regions[i].height_mbs = roi_height_mbs;
2010
2011         roi_qp = base_qp + encoder_context->brc.roi[i].value;
2012         BRC_CLIP(roi_qp, min_qp, 51);
2013
2014         param_regions[i].roi_qp = roi_qp;
2015         qstep_roi = intel_h264_qp_qstep(roi_qp);
2016
2017         roi_area += mbs_in_roi;
2018         sum_roi += mbs_in_roi / qstep_roi;
2019     }
2020
2021     total_area = mbs_in_picture;
2022     nonroi_area = total_area - roi_area;
2023
2024     qstep_base = intel_h264_qp_qstep(base_qp);
2025     temp = (total_area / qstep_base - sum_roi);
2026
2027     if (temp < 0) {
2028         nonroi_qp = 51;
2029     } else {
2030         qstep_nonroi = nonroi_area / temp;
2031         nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
2032     }
2033
2034     BRC_CLIP(nonroi_qp, min_qp, 51);
2035
2036 qp_fill:
2037     memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
2038     if (!quickfill) {
2039         char *qp_ptr;
2040
2041         for (i = 0; i < num_roi; i++) {
2042             for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
2043                 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
2044                 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
2045             }
2046         }
2047     }
2048     return vaStatus;
2049 }
2050
2051 extern void
2052 intel_h264_enc_roi_config(VADriverContextP ctx,
2053                           struct encode_state *encode_state,
2054                           struct intel_encoder_context *encoder_context)
2055 {
2056     char *qp_ptr;
2057     int i, j;
2058     struct i965_driver_data *i965 = i965_driver_data(ctx);
2059     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2060     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2061     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2062     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
2063     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
2064
2065     int row_start, row_end, col_start, col_end;
2066     int num_roi = 0;
2067
2068     vme_context->roi_enabled = 0;
2069     /* Restriction: Disable ROI when multi-slice is enabled */
2070     if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
2071         return;
2072
2073     vme_context->roi_enabled = !!encoder_context->brc.num_roi;
2074
2075     if (!vme_context->roi_enabled)
2076         return;
2077
2078     if ((vme_context->saved_width_mbs !=  width_in_mbs) ||
2079         (vme_context->saved_height_mbs != height_in_mbs)) {
2080         free(vme_context->qp_per_mb);
2081         vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
2082
2083         vme_context->saved_width_mbs = width_in_mbs;
2084         vme_context->saved_height_mbs = height_in_mbs;
2085         assert(vme_context->qp_per_mb);
2086     }
2087     if (encoder_context->rate_control_mode == VA_RC_CBR) {
2088         /*
2089          * TODO: More complex Qp adjust needs to be added.
2090          * Currently it is initialized to slice_qp.
2091          */
2092         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2093         int qp;
2094         int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
2095
2096         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
2097         intel_h264_enc_roi_cbr(ctx, qp, encode_state, encoder_context);
2098
2099     } else if (encoder_context->rate_control_mode == VA_RC_CQP) {
2100         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2101         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2102         int qp;
2103         int min_qp = MAX(1, encoder_context->brc.min_qp);
2104
2105         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2106         memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
2107
2108
2109         for (j = num_roi; j ; j--) {
2110             int qp_delta, qp_clip;
2111
2112             col_start = encoder_context->brc.roi[i].left;
2113             col_end = encoder_context->brc.roi[i].right;
2114             row_start = encoder_context->brc.roi[i].top;
2115             row_end = encoder_context->brc.roi[i].bottom;
2116
2117             col_start = col_start / 16;
2118             col_end = (col_end + 15) / 16;
2119             row_start = row_start / 16;
2120             row_end = (row_end + 15) / 16;
2121
2122             qp_delta = encoder_context->brc.roi[i].value;
2123             qp_clip = qp + qp_delta;
2124
2125             BRC_CLIP(qp_clip, min_qp, 51);
2126
2127             for (i = row_start; i < row_end; i++) {
2128                 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
2129                 memset(qp_ptr, qp_clip, (col_end - col_start));
2130             }
2131         }
2132     } else {
2133         /*
2134          * TODO: Disable it for non CBR-CQP.
2135          */
2136         vme_context->roi_enabled = 0;
2137     }
2138
2139     if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2140         encoder_context->soft_batch_force = 1;
2141
2142     return;
2143 }
2144
2145 /* HEVC */
2146 static int
2147 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2148                            VAPictureHEVC *ref_list,
2149                            int num_pictures,
2150                            int dir)
2151 {
2152     int i, found = -1, min = 0x7FFFFFFF;
2153
2154     for (i = 0; i < num_pictures; i++) {
2155         int tmp;
2156
2157         if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2158             (ref_list[i].picture_id == VA_INVALID_SURFACE))
2159             break;
2160
2161         tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2162
2163         if (dir)
2164             tmp = -tmp;
2165
2166         if (tmp > 0 && tmp < min) {
2167             min = tmp;
2168             found = i;
2169         }
2170     }
2171
2172     return found;
2173 }
2174 void
2175 intel_hevc_vme_reference_state(VADriverContextP ctx,
2176                                struct encode_state *encode_state,
2177                                struct intel_encoder_context *encoder_context,
2178                                int list_index,
2179                                int surface_index,
2180                                void (* vme_source_surface_state)(
2181                                    VADriverContextP ctx,
2182                                    int index,
2183                                    struct object_surface *obj_surface,
2184                                    struct intel_encoder_context *encoder_context))
2185 {
2186     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2187     struct object_surface *obj_surface = NULL;
2188     struct i965_driver_data *i965 = i965_driver_data(ctx);
2189     VASurfaceID ref_surface_id;
2190     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2191     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2192     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2193     int max_num_references;
2194     VAPictureHEVC *curr_pic;
2195     VAPictureHEVC *ref_list;
2196     int ref_idx;
2197     unsigned int is_hevc10 = 0;
2198     GenHevcSurface *hevc_encoder_surface = NULL;
2199
2200     if ((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2201         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2202         is_hevc10 = 1;
2203
2204     if (list_index == 0) {
2205         max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2206         ref_list = slice_param->ref_pic_list0;
2207     } else {
2208         max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2209         ref_list = slice_param->ref_pic_list1;
2210     }
2211
2212     if (max_num_references == 1) {
2213         if (list_index == 0) {
2214             ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2215             vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2216         } else {
2217             ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2218             vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2219         }
2220
2221         if (ref_surface_id != VA_INVALID_SURFACE)
2222             obj_surface = SURFACE(ref_surface_id);
2223
2224         if (!obj_surface ||
2225             !obj_surface->bo) {
2226             obj_surface = encode_state->reference_objects[list_index];
2227             vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2228         }
2229
2230         ref_idx = 0;
2231     } else {
2232         curr_pic = &pic_param->decoded_curr_pic;
2233
2234         /* select the reference frame in temporal space */
2235         ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2236         ref_surface_id = ref_list[ref_idx].picture_id;
2237
2238         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2239             obj_surface = SURFACE(ref_surface_id);
2240
2241         vme_context->used_reference_objects[list_index] = obj_surface;
2242         vme_context->used_references[list_index] = &ref_list[ref_idx];
2243     }
2244
2245     if (obj_surface &&
2246         obj_surface->bo) {
2247         assert(ref_idx >= 0);
2248         vme_context->used_reference_objects[list_index] = obj_surface;
2249
2250         if (is_hevc10) {
2251             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2252             assert(hevc_encoder_surface);
2253             obj_surface = hevc_encoder_surface->nv12_surface_obj;
2254         }
2255         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2256         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2257                                                     ref_idx << 16 |
2258                                                     ref_idx <<  8 |
2259                                                     ref_idx);
2260     } else {
2261         vme_context->used_reference_objects[list_index] = NULL;
2262         vme_context->used_references[list_index] = NULL;
2263         vme_context->ref_index_in_mb[list_index] = 0;
2264     }
2265 }
2266
2267 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2268                                      struct encode_state *encode_state,
2269                                      struct intel_encoder_context *encoder_context)
2270 {
2271     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2272     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2273     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2274     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2275     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2276     int qp, m_cost, j, mv_count;
2277     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2278     float   lambda, m_costf;
2279
2280     /* here no SI SP slice for HEVC, do not need slice fixup */
2281     int slice_type = slice_param->slice_type;
2282
2283
2284     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2285
2286     if (encoder_context->rate_control_mode == VA_RC_CBR) {
2287         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2288         if (slice_type == HEVC_SLICE_B) {
2289             if (pSequenceParameter->ip_period == 1) {
2290                 slice_type = HEVC_SLICE_P;
2291                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2292
2293             } else if (mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1) {
2294                 slice_type = HEVC_SLICE_P;
2295                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2296             }
2297         }
2298
2299     }
2300
2301     if (vme_state_message == NULL)
2302         return;
2303
2304     assert(qp <= QP_MAX);
2305     lambda = intel_lambda_qp(qp);
2306     if (slice_type == HEVC_SLICE_I) {
2307         vme_state_message[MODE_INTRA_16X16] = 0;
2308         m_cost = lambda * 4;
2309         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2310         m_cost = lambda * 16;
2311         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2312         m_cost = lambda * 3;
2313         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2314     } else {
2315         m_cost = 0;
2316         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2317         for (j = 1; j < 3; j++) {
2318             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2319             m_cost = (int)m_costf;
2320             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2321         }
2322         mv_count = 3;
2323         for (j = 4; j <= 64; j *= 2) {
2324             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2325             m_cost = (int)m_costf;
2326             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2327             mv_count++;
2328         }
2329
2330         if (qp <= 25) {
2331             vme_state_message[MODE_INTRA_16X16] = 0x4a;
2332             vme_state_message[MODE_INTRA_8X8] = 0x4a;
2333             vme_state_message[MODE_INTRA_4X4] = 0x4a;
2334             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2335             vme_state_message[MODE_INTER_16X16] = 0x4a;
2336             vme_state_message[MODE_INTER_16X8] = 0x4a;
2337             vme_state_message[MODE_INTER_8X8] = 0x4a;
2338             vme_state_message[MODE_INTER_8X4] = 0x4a;
2339             vme_state_message[MODE_INTER_4X4] = 0x4a;
2340             vme_state_message[MODE_INTER_BWD] = 0x2a;
2341             return;
2342         }
2343         m_costf = lambda * 10;
2344         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2345         m_cost = lambda * 14;
2346         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2347         m_cost = lambda * 24;
2348         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2349         m_costf = lambda * 3.5;
2350         m_cost = m_costf;
2351         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2352         if (slice_type == HEVC_SLICE_P) {
2353             m_costf = lambda * 2.5;
2354             m_cost = m_costf;
2355             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2356             m_costf = lambda * 4;
2357             m_cost = m_costf;
2358             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2359             m_costf = lambda * 1.5;
2360             m_cost = m_costf;
2361             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2362             m_costf = lambda * 3;
2363             m_cost = m_costf;
2364             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2365             m_costf = lambda * 5;
2366             m_cost = m_costf;
2367             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2368             /* BWD is not used in P-frame */
2369             vme_state_message[MODE_INTER_BWD] = 0;
2370         } else {
2371             m_costf = lambda * 2.5;
2372             m_cost = m_costf;
2373             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2374             m_costf = lambda * 5.5;
2375             m_cost = m_costf;
2376             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2377             m_costf = lambda * 3.5;
2378             m_cost = m_costf;
2379             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2380             m_costf = lambda * 5.0;
2381             m_cost = m_costf;
2382             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2383             m_costf = lambda * 6.5;
2384             m_cost = m_costf;
2385             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2386             m_costf = lambda * 1.5;
2387             m_cost = m_costf;
2388             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
2389         }
2390     }
2391 }