OSDN Git Service

intel-vaapi-driver 1.8.1.pre1
[android-x86/hardware-intel-common-vaapi.git] / src / gen6_mfc_common.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "gen9_mfc.h"
45 #include "intel_media.h"
46
47 #ifndef HAVE_LOG2F
48 #define log2f(x) (logf(x)/(float)M_LN2)
49 #endif
50
51 int intel_avc_enc_slice_type_fixup(int slice_type)
52 {
53     if (slice_type == SLICE_TYPE_SP ||
54         slice_type == SLICE_TYPE_P)
55         slice_type = SLICE_TYPE_P;
56     else if (slice_type == SLICE_TYPE_SI ||
57              slice_type == SLICE_TYPE_I)
58         slice_type = SLICE_TYPE_I;
59     else {
60         if (slice_type != SLICE_TYPE_B)
61             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
62
63         slice_type = SLICE_TYPE_B;
64     }
65
66     return slice_type;
67 }
68
69 static void
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
71                                         struct intel_encoder_context *encoder_context)
72 {
73     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
74     int i;
75
76     for(i = 0 ; i < 3; i++) {
77         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79         mfc_context->bit_rate_control_context[i].GrowInit = 6;
80         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
83         
84         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
90     }
91 }
92
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94                                struct intel_encoder_context* encoder_context)
95 {
96     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97     double bitrate, framerate;
98     double frame_per_bits = 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99     double qp1_size = 0.1 * frame_per_bits;
100     double qp51_size = 0.001 * frame_per_bits;
101     int min_qp = MAX(1, encoder_context->brc.min_qp);
102     double bpf, factor, hrd_factor;
103     int inum = encoder_context->brc.num_iframes_in_gop,
104         pnum = encoder_context->brc.num_pframes_in_gop,
105         bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
106     int intra_period = encoder_context->brc.gop_size;
107     int i;
108
109     if (encoder_context->layer.num_layers > 1)
110         qp1_size = 0.15 * frame_per_bits;
111
112     mfc_context->brc.mode = encoder_context->rate_control_mode;
113
114     mfc_context->hrd.violation_noted = 0;
115
116     for (i = 0; i < encoder_context->layer.num_layers; i++) {
117         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
118         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
119         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
120
121         if (i == 0) {
122             bitrate = encoder_context->brc.bits_per_second[0];
123             framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
124         } else {
125             bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
126             framerate = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) -
127                 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
128         }
129
130         if (mfc_context->brc.mode == VA_RC_VBR && encoder_context->brc.target_percentage[i])
131             bitrate = bitrate * encoder_context->brc.target_percentage[i] / 100;
132
133         if (i == encoder_context->layer.num_layers - 1)
134             factor = 1.0;
135         else {
136             factor = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) /
137                 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
138         }
139
140         hrd_factor = (double)bitrate / encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
141
142         mfc_context->hrd.buffer_size[i] = (unsigned int)(encoder_context->brc.hrd_buffer_size * hrd_factor);
143         mfc_context->hrd.current_buffer_fullness[i] =
144             (double)(encoder_context->brc.hrd_initial_buffer_fullness < encoder_context->brc.hrd_buffer_size) ?
145             encoder_context->brc.hrd_initial_buffer_fullness : encoder_context->brc.hrd_buffer_size / 2.;
146         mfc_context->hrd.current_buffer_fullness[i] *= hrd_factor;
147         mfc_context->hrd.target_buffer_fullness[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / 2.;
148         mfc_context->hrd.buffer_capacity[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / qp1_size;
149
150         if (encoder_context->layer.num_layers > 1) {
151             if (i == 0) {
152                 intra_period = (int)(encoder_context->brc.gop_size * factor);
153                 inum = 1;
154                 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor);
155                 bnum = intra_period - inum - pnum;
156             } else {
157                 intra_period = (int)(encoder_context->brc.gop_size * factor) - intra_period;
158                 inum = 0;
159                 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor) - pnum;
160                 bnum = intra_period - inum - pnum;
161             }
162         }
163
164         mfc_context->brc.gop_nums[i][SLICE_TYPE_I] = inum;
165         mfc_context->brc.gop_nums[i][SLICE_TYPE_P] = pnum;
166         mfc_context->brc.gop_nums[i][SLICE_TYPE_B] = bnum;
167
168         mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
169                                                                     (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
170         mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
171         mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
172
173         bpf = mfc_context->brc.bits_per_frame[i] = bitrate/framerate;
174
175         if (encoder_context->brc.initial_qp) {
176             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = encoder_context->brc.initial_qp;
177             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = encoder_context->brc.initial_qp;
178             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = encoder_context->brc.initial_qp;
179         } else {
180             if ((bpf > qp51_size) && (bpf < qp1_size)) {
181                 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
182             }
183             else if (bpf >= qp1_size)
184                 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
185             else if (bpf <= qp51_size)
186                 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
187
188             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
189             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
190         }
191
192         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], min_qp, 51);
193         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], min_qp, 51);
194         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], min_qp, 51);
195     }
196 }
197
198 int intel_mfc_update_hrd(struct encode_state *encode_state,
199                          struct intel_encoder_context *encoder_context,
200                          int frame_bits)
201 {
202     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
203     int layer_id = encoder_context->layer.curr_frame_layer_id;
204     double prev_bf = mfc_context->hrd.current_buffer_fullness[layer_id];
205
206     mfc_context->hrd.current_buffer_fullness[layer_id] -= frame_bits;
207
208     if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] <= 0.) {
209         mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
210         return BRC_UNDERFLOW;
211     }
212     
213     mfc_context->hrd.current_buffer_fullness[layer_id] += mfc_context->brc.bits_per_frame[layer_id];
214     if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] > mfc_context->hrd.buffer_size[layer_id]) {
215         if (mfc_context->brc.mode == VA_RC_VBR)
216             mfc_context->hrd.current_buffer_fullness[layer_id] = mfc_context->hrd.buffer_size[layer_id];
217         else {
218             mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
219             return BRC_OVERFLOW;
220         }
221     }
222     return BRC_NO_HRD_VIOLATION;
223 }
224
225 static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
226                                       struct intel_encoder_context *encoder_context,
227                                       int frame_bits)
228 {
229     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
230     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
231     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
232     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
233     int curr_frame_layer_id, next_frame_layer_id;
234     int qpi, qpp, qpb;
235     int qp; // quantizer of previously encoded slice of current type
236     int qpn; // predicted quantizer for next frame of current type in integer format
237     double qpf; // predicted quantizer for next frame of current type in float format
238     double delta_qp; // QP correction
239     int min_qp = MAX(1, encoder_context->brc.min_qp);
240     int target_frame_size, frame_size_next;
241     /* Notes:
242      *  x - how far we are from HRD buffer borders
243      *  y - how far we are from target HRD buffer fullness
244      */
245     double x, y;
246     double frame_size_alpha;
247
248     if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
249         curr_frame_layer_id = 0;
250         next_frame_layer_id = 0;
251     } else {
252         curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
253         next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
254     }
255
256     /* checking wthether HRD compliance first */
257     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
258
259     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
260         /* nothing */
261     } else {
262         next_frame_layer_id = curr_frame_layer_id;
263     }
264
265     mfc_context->brc.bits_prev_frame[curr_frame_layer_id] = frame_bits;
266     frame_bits = mfc_context->brc.bits_prev_frame[next_frame_layer_id];
267
268     mfc_context->brc.prev_slice_type[curr_frame_layer_id] = slicetype;
269     slicetype = mfc_context->brc.prev_slice_type[next_frame_layer_id];
270
271     /* 0 means the next frame is the first frame of next layer */
272     if (frame_bits == 0)
273         return sts;
274
275     qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
276     qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
277     qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
278
279     qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
280
281     target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
282     if (mfc_context->hrd.buffer_capacity[next_frame_layer_id] < 5)
283         frame_size_alpha = 0;
284     else
285         frame_size_alpha = (double)mfc_context->brc.gop_nums[next_frame_layer_id][slicetype];
286     if (frame_size_alpha > 30) frame_size_alpha = 30;
287     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
288         (double)(frame_size_alpha + 1.);
289
290     /* frame_size_next: avoiding negative number and too small value */
291     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
292         frame_size_next = (int)((double)target_frame_size * 0.25);
293
294     qpf = (double)qp * target_frame_size / frame_size_next;
295     qpn = (int)(qpf + 0.5);
296
297     if (qpn == qp) {
298         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
299         mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] += qpf - qpn;
300         if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] > 1.0) {
301             qpn++;
302             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
303         } else if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] < -1.0) {
304             qpn--;
305             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
306         }
307     }
308     /* making sure that QP is not changing too fast */
309     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
310     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
311     /* making sure that with QP predictions we did do not leave QPs range */
312     BRC_CLIP(qpn, 1, 51);
313
314     /* calculating QP delta as some function*/
315     x = mfc_context->hrd.target_buffer_fullness[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
316     if (x > 0) {
317         x /= mfc_context->hrd.target_buffer_fullness[next_frame_layer_id];
318         y = mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
319     }
320     else {
321         x /= (mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.target_buffer_fullness[next_frame_layer_id]);
322         y = mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
323     }
324     if (y < 0.01) y = 0.01;
325     if (x > 1) x = 1;
326     else if (x < -1) x = -1;
327
328     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
329     qpn = (int)(qpn + delta_qp + 0.5);
330
331     /* making sure that with QP predictions we did do not leave QPs range */
332     BRC_CLIP(qpn, min_qp, 51);
333
334     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
335         /* correcting QPs of slices of other types */
336         if (slicetype == SLICE_TYPE_P) {
337             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
338                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
339             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
340                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
341         } else if (slicetype == SLICE_TYPE_I) {
342             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
343                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
344             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
345                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
346         } else { // SLICE_TYPE_B
347             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
348                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
349             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
350                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
351         }
352         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], min_qp, 51);
353         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], min_qp, 51);
354         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], min_qp, 51);
355     } else if (sts == BRC_UNDERFLOW) { // underflow
356         if (qpn <= qp) qpn = qp + 1;
357         if (qpn > 51) {
358             qpn = 51;
359             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
360         }
361     } else if (sts == BRC_OVERFLOW) {
362         if (qpn >= qp) qpn = qp - 1;
363         if (qpn < min_qp) { // overflow with minQP
364             qpn = min_qp;
365             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
366         }
367     }
368
369     mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
370
371     return sts;
372 }
373
374 static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
375                                       struct intel_encoder_context *encoder_context,
376                                       int frame_bits)
377 {
378     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
379     gen6_brc_status sts;
380     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
381     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
382     int *qp = mfc_context->brc.qp_prime_y[0];
383     int min_qp = MAX(1, encoder_context->brc.min_qp);
384     int qp_delta, large_frame_adjustment;
385
386     // This implements a simple reactive VBR rate control mode for single-layer H.264.  The primary
387     // aim here is to avoid the problematic behaviour that the CBR rate controller displays on
388     // scene changes, where the QP can get pushed up by a large amount in a short period and
389     // compromise the quality of following frames to a very visible degree.
390     // The main idea, then, is to try to keep the HRD buffering above the target level most of the
391     // time, so that when a large frame is generated (on a scene change or when the stream
392     // complexity increases) we have plenty of slack to be able to encode the more difficult region
393     // without compromising quality immediately on the following frames.   It is optimistic about
394     // the complexity of future frames, so even after generating one or more large frames on a
395     // significant change it will try to keep the QP at its current level until the HRD buffer
396     // bounds force a change to maintain the intended rate.
397
398     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
399
400     // This adjustment is applied to increase the QP by more than we normally would if a very
401     // large frame is encountered and we are in danger of running out of slack.
402     large_frame_adjustment = rint(2.0 * log(frame_bits / mfc_context->brc.target_frame_size[0][slice_type]));
403
404     if (sts == BRC_UNDERFLOW) {
405         // The frame is far too big and we don't have the bits available to send it, so it will
406         // have to be re-encoded at a higher QP.
407         qp_delta = +2;
408         if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
409             qp_delta += large_frame_adjustment;
410     } else if (sts == BRC_OVERFLOW) {
411         // The frame is very small and we are now overflowing the HRD buffer.  Currently this case
412         // does not occur because we ignore overflow in VBR mode.
413         assert(0 && "Overflow in VBR mode");
414     } else if (frame_bits <= mfc_context->brc.target_frame_size[0][slice_type]) {
415         // The frame is smaller than the average size expected for this frame type.
416         if (mfc_context->hrd.current_buffer_fullness[0] >
417             (mfc_context->hrd.target_buffer_fullness[0] + mfc_context->hrd.buffer_size[0]) / 2.0) {
418             // We currently have lots of bits available, so decrease the QP slightly for the next
419             // frame.
420             qp_delta = -1;
421         } else {
422             // The HRD buffer fullness is increasing, so do nothing.  (We may be under the target
423             // level here, but are moving in the right direction.)
424             qp_delta = 0;
425         }
426     } else {
427         // The frame is larger than the average size expected for this frame type.
428         if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0]) {
429             // We are currently over the target level, so do nothing.
430             qp_delta = 0;
431         } else if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0] / 2.0) {
432             // We are under the target level, but not critically.  Increase the QP by one step if
433             // continuing like this would underflow soon (currently within one second).
434             if (mfc_context->hrd.current_buffer_fullness[0] /
435                 (double)(frame_bits - mfc_context->brc.target_frame_size[0][slice_type] + 1) <
436                 ((double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den))
437                 qp_delta = +1;
438             else
439                 qp_delta = 0;
440         } else {
441             // We are a long way under the target level.  Always increase the QP, possibly by a
442             // larger amount dependent on how big the frame we just made actually was.
443             qp_delta = +1 + large_frame_adjustment;
444         }
445     }
446
447     switch (slice_type) {
448     case SLICE_TYPE_I:
449         qp[SLICE_TYPE_I] += qp_delta;
450         qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
451         qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
452         break;
453     case SLICE_TYPE_P:
454         qp[SLICE_TYPE_P] += qp_delta;
455         qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
456         qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
457         break;
458     case SLICE_TYPE_B:
459         qp[SLICE_TYPE_B] += qp_delta;
460         qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
461         qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
462         break;
463     }
464     BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qp, 51);
465     BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qp, 51);
466     BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], min_qp, 51);
467
468     if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
469         sts = BRC_UNDERFLOW_WITH_MAX_QP;
470     if (sts == BRC_OVERFLOW && qp[slice_type] == min_qp)
471         sts = BRC_OVERFLOW_WITH_MIN_QP;
472
473     return sts;
474 }
475
476 int intel_mfc_brc_postpack(struct encode_state *encode_state,
477                            struct intel_encoder_context *encoder_context,
478                            int frame_bits)
479 {
480     switch (encoder_context->rate_control_mode) {
481     case VA_RC_CBR:
482         return intel_mfc_brc_postpack_cbr(encode_state, encoder_context, frame_bits);
483     case VA_RC_VBR:
484         return intel_mfc_brc_postpack_vbr(encode_state, encoder_context, frame_bits);
485     }
486     assert(0 && "Invalid RC mode");
487 }
488
489 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
490                                        struct intel_encoder_context *encoder_context)
491 {
492     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
493     unsigned int rate_control_mode = encoder_context->rate_control_mode;
494     int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
495     
496     // current we only support CBR mode.
497     if (rate_control_mode == VA_RC_CBR) {
498         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
499         mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
500         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
501         mfc_context->vui_hrd.i_frame_number = 0;
502
503         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; 
504         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
505         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
506     }
507
508 }
509
510 void 
511 intel_mfc_hrd_context_update(struct encode_state *encode_state, 
512                              struct gen6_mfc_context *mfc_context)
513 {
514     mfc_context->vui_hrd.i_frame_number++;
515 }
516
517 int intel_mfc_interlace_check(VADriverContextP ctx,
518                               struct encode_state *encode_state,
519                               struct intel_encoder_context *encoder_context)
520 {
521     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
522     VAEncSliceParameterBufferH264 *pSliceParameter;
523     int i;
524     int mbCount = 0;
525     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
526     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
527   
528     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
529         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer; 
530         mbCount += pSliceParameter->num_macroblocks; 
531     }
532     
533     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
534         return 0;
535
536     return 1;
537 }
538
539 void intel_mfc_brc_prepare(struct encode_state *encode_state,
540                            struct intel_encoder_context *encoder_context)
541 {
542     unsigned int rate_control_mode = encoder_context->rate_control_mode;
543
544     if (encoder_context->codec != CODEC_H264 &&
545         encoder_context->codec != CODEC_H264_MVC)
546         return;
547
548     if (rate_control_mode != VA_RC_CQP) {
549         /*Programing bit rate control */
550         if (encoder_context->brc.need_reset) {
551             intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
552             intel_mfc_brc_init(encode_state, encoder_context);
553         }
554
555         /*Programing HRD control */
556         if (encoder_context->brc.need_reset)
557             intel_mfc_hrd_context_init(encode_state, encoder_context);    
558     }
559 }
560
561 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
562                                               struct encode_state *encode_state,
563                                               struct intel_encoder_context *encoder_context,
564                                               struct intel_batchbuffer *slice_batch)
565 {
566     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
567     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
568     unsigned int rate_control_mode = encoder_context->rate_control_mode;
569     unsigned int skip_emul_byte_cnt;
570
571     if (encode_state->packed_header_data[idx]) {
572         VAEncPackedHeaderParameterBuffer *param = NULL;
573         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
574         unsigned int length_in_bits;
575
576         assert(encode_state->packed_header_param[idx]);
577         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
578         length_in_bits = param->bit_length;
579
580         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
581         mfc_context->insert_object(ctx,
582                                    encoder_context,
583                                    header_data,
584                                    ALIGN(length_in_bits, 32) >> 5,
585                                    length_in_bits & 0x1f,
586                                    skip_emul_byte_cnt,
587                                    0,
588                                    0,
589                                    !param->has_emulation_bytes,
590                                    slice_batch);
591     }
592
593     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
594
595     if (encode_state->packed_header_data[idx]) {
596         VAEncPackedHeaderParameterBuffer *param = NULL;
597         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
598         unsigned int length_in_bits;
599
600         assert(encode_state->packed_header_param[idx]);
601         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
602         length_in_bits = param->bit_length;
603
604         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
605
606         mfc_context->insert_object(ctx,
607                                    encoder_context,
608                                    header_data,
609                                    ALIGN(length_in_bits, 32) >> 5,
610                                    length_in_bits & 0x1f,
611                                    skip_emul_byte_cnt,
612                                    0,
613                                    0,
614                                    !param->has_emulation_bytes,
615                                    slice_batch);
616     }
617     
618     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
619
620     if (encode_state->packed_header_data[idx]) {
621         VAEncPackedHeaderParameterBuffer *param = NULL;
622         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
623         unsigned int length_in_bits;
624
625         assert(encode_state->packed_header_param[idx]);
626         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
627         length_in_bits = param->bit_length;
628
629         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
630         mfc_context->insert_object(ctx,
631                                    encoder_context,
632                                    header_data,
633                                    ALIGN(length_in_bits, 32) >> 5,
634                                    length_in_bits & 0x1f,
635                                    skip_emul_byte_cnt,
636                                    0,
637                                    0,
638                                    !param->has_emulation_bytes,
639                                    slice_batch);
640     } else if (rate_control_mode == VA_RC_CBR) {
641         // this is frist AU
642         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
643
644         unsigned char *sei_data = NULL;
645     
646         int length_in_bits = build_avc_sei_buffer_timing(
647             mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
648             mfc_context->vui_hrd.i_initial_cpb_removal_delay,
649             0,
650             mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
651             mfc_context->vui_hrd.i_dpb_output_delay_length,
652             0,
653             &sei_data);
654         mfc_context->insert_object(ctx,
655                                    encoder_context,
656                                    (unsigned int *)sei_data,
657                                    ALIGN(length_in_bits, 32) >> 5,
658                                    length_in_bits & 0x1f,
659                                    5,
660                                    0,   
661                                    0,   
662                                    1,
663                                    slice_batch);  
664         free(sei_data);
665     }
666 }
667
668 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, 
669                                struct encode_state *encode_state,
670                                struct intel_encoder_context *encoder_context)
671 {
672     struct i965_driver_data *i965 = i965_driver_data(ctx);
673     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
674     struct object_surface *obj_surface; 
675     struct object_buffer *obj_buffer;
676     GenAvcSurface *gen6_avc_surface;
677     dri_bo *bo;
678     VAStatus vaStatus = VA_STATUS_SUCCESS;
679     int i, j, enable_avc_ildb = 0;
680     VAEncSliceParameterBufferH264 *slice_param;
681     struct i965_coded_buffer_segment *coded_buffer_segment;
682     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
683     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
684     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
685
686     if (IS_GEN6(i965->intel.device_info)) {
687         /* On the SNB it should be fixed to 128 for the DMV buffer */
688         width_in_mbs = 128;
689     }
690
691     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
692         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
693         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
694
695         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
696             assert((slice_param->slice_type == SLICE_TYPE_I) ||
697                    (slice_param->slice_type == SLICE_TYPE_SI) ||
698                    (slice_param->slice_type == SLICE_TYPE_P) ||
699                    (slice_param->slice_type == SLICE_TYPE_SP) ||
700                    (slice_param->slice_type == SLICE_TYPE_B));
701
702             if (slice_param->disable_deblocking_filter_idc != 1) {
703                 enable_avc_ildb = 1;
704                 break;
705             }
706
707             slice_param++;
708         }
709     }
710
711     /*Setup all the input&output object*/
712
713     /* Setup current frame and current direct mv buffer*/
714     obj_surface = encode_state->reconstructed_object;
715     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
716
717     if ( obj_surface->private_data == NULL) {
718         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
719         assert(gen6_avc_surface);
720         gen6_avc_surface->dmv_top = 
721             dri_bo_alloc(i965->intel.bufmgr,
722                          "Buffer",
723                          68 * width_in_mbs * height_in_mbs, 
724                          64);
725         gen6_avc_surface->dmv_bottom = 
726             dri_bo_alloc(i965->intel.bufmgr,
727                          "Buffer",
728                          68 * width_in_mbs * height_in_mbs, 
729                          64);
730         assert(gen6_avc_surface->dmv_top);
731         assert(gen6_avc_surface->dmv_bottom);
732         obj_surface->private_data = (void *)gen6_avc_surface;
733         obj_surface->free_private_data = (void *)gen_free_avc_surface; 
734     }
735     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
736     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
737     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
738     dri_bo_reference(gen6_avc_surface->dmv_top);
739     dri_bo_reference(gen6_avc_surface->dmv_bottom);
740
741     if (enable_avc_ildb) {
742         mfc_context->post_deblocking_output.bo = obj_surface->bo;
743         dri_bo_reference(mfc_context->post_deblocking_output.bo);
744     } else {
745         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
746         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
747     }
748
749     mfc_context->surface_state.width = obj_surface->orig_width;
750     mfc_context->surface_state.height = obj_surface->orig_height;
751     mfc_context->surface_state.w_pitch = obj_surface->width;
752     mfc_context->surface_state.h_pitch = obj_surface->height;
753     
754     /* Setup reference frames and direct mv buffers*/
755     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
756         obj_surface = encode_state->reference_objects[i];
757         
758         if (obj_surface && obj_surface->bo) {
759             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
760             dri_bo_reference(obj_surface->bo);
761
762             /* Check DMV buffer */
763             if ( obj_surface->private_data == NULL) {
764                 
765                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
766                 assert(gen6_avc_surface);
767                 gen6_avc_surface->dmv_top = 
768                     dri_bo_alloc(i965->intel.bufmgr,
769                                  "Buffer",
770                                  68 * width_in_mbs * height_in_mbs, 
771                                  64);
772                 gen6_avc_surface->dmv_bottom = 
773                     dri_bo_alloc(i965->intel.bufmgr,
774                                  "Buffer",
775                                  68 * width_in_mbs * height_in_mbs, 
776                                  64);
777                 assert(gen6_avc_surface->dmv_top);
778                 assert(gen6_avc_surface->dmv_bottom);
779                 obj_surface->private_data = gen6_avc_surface;
780                 obj_surface->free_private_data = gen_free_avc_surface; 
781             }
782     
783             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
784             /* Setup DMV buffer */
785             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
786             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
787             dri_bo_reference(gen6_avc_surface->dmv_top);
788             dri_bo_reference(gen6_avc_surface->dmv_bottom);
789         } else {
790             break;
791         }
792     }
793
794     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
795     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
796
797     obj_buffer = encode_state->coded_buf_object;
798     bo = obj_buffer->buffer_store->bo;
799     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
800     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
801     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
802     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
803     
804     dri_bo_map(bo, 1);
805     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
806     coded_buffer_segment->mapped = 0;
807     coded_buffer_segment->codec = encoder_context->codec;
808     dri_bo_unmap(bo);
809
810     return vaStatus;
811 }
812 /*
813  * The LUT uses the pair of 4-bit units: (shift, base) structure.
814  * 2^K * X = value . 
815  * So it is necessary to convert one cost into the nearest LUT format.
816  * The derivation is:
817  * 2^K *x = 2^n * (1 + deltaX)
818  *    k + log2(x) = n + log2(1 + deltaX)
819  *    log2(x) = n - k + log2(1 + deltaX)
820  *    As X is in the range of [1, 15]
821  *      4 > n - k + log2(1 + deltaX) >= 0 
822  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
823  *    Then we can derive the corresponding K and get the nearest LUT format.
824  */
825 int intel_format_lutvalue(int value, int max)
826 {
827     int ret;
828     int logvalue, temp1, temp2;
829
830     if (value <= 0)
831         return 0;
832
833     logvalue = (int)(log2f((float)value));
834     if (logvalue < 4) {
835         ret = value;
836     } else {
837         int error, temp_value, base, j, temp_err;
838         error = value;
839         j = logvalue - 4 + 1;
840         ret = -1;
841         for(; j <= logvalue; j++) {
842             if (j == 0) {
843                 base = value >> j;
844             } else {
845                 base = (value + (1 << (j - 1)) - 1) >> j;
846             }
847             if (base >= 16)
848                 continue;
849
850             temp_value = base << j;
851             temp_err = abs(value - temp_value);
852             if (temp_err < error) {
853                 error = temp_err;
854                 ret = (j << 4) | base;
855                 if (temp_err == 0)
856                     break;
857             }
858         }
859     }
860     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
861     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
862     if (temp1 > temp2)
863         ret = max;
864     return ret;
865
866 }
867
868
869 #define         QP_MAX                  52
870 #define         VP8_QP_MAX              128
871
872
873 static float intel_lambda_qp(int qp)
874 {
875     float value, lambdaf;
876     value = qp;
877     value = value / 6 - 2;
878     if (value < 0)
879         value = 0;
880     lambdaf = roundf(powf(2, value));
881     return lambdaf;
882 }
883
884 static
885 void intel_h264_calc_mbmvcost_qp(int qp,
886                                  int slice_type,
887                                  uint8_t *vme_state_message)
888 {
889     int m_cost, j, mv_count;
890     float   lambda, m_costf;
891
892     assert(qp <= QP_MAX); 
893     lambda = intel_lambda_qp(qp);
894
895     m_cost = lambda;
896     vme_state_message[MODE_CHROMA_INTRA] = 0;
897     vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
898
899     if (slice_type == SLICE_TYPE_I) {
900         vme_state_message[MODE_INTRA_16X16] = 0;
901         m_cost = lambda * 4;
902         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
903         m_cost = lambda * 16; 
904         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
905         m_cost = lambda * 3;
906         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
907     } else {
908         m_cost = 0;
909         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
910         for (j = 1; j < 3; j++) {
911             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
912             m_cost = (int)m_costf;
913             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
914         }
915         mv_count = 3;
916         for (j = 4; j <= 64; j *= 2) {
917             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
918             m_cost = (int)m_costf;
919             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
920             mv_count++;
921         }
922
923         if (qp <= 25) {
924             vme_state_message[MODE_INTRA_16X16] = 0x4a;
925             vme_state_message[MODE_INTRA_8X8] = 0x4a;
926             vme_state_message[MODE_INTRA_4X4] = 0x4a;
927             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
928             vme_state_message[MODE_INTER_16X16] = 0x4a;
929             vme_state_message[MODE_INTER_16X8] = 0x4a;
930             vme_state_message[MODE_INTER_8X8] = 0x4a;
931             vme_state_message[MODE_INTER_8X4] = 0x4a;
932             vme_state_message[MODE_INTER_4X4] = 0x4a;
933             vme_state_message[MODE_INTER_BWD] = 0x2a;
934             return;
935         }
936         m_costf = lambda * 10;
937         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
938         m_cost = lambda * 14;
939         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
940         m_cost = lambda * 24; 
941         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
942         m_costf = lambda * 3.5;
943         m_cost = m_costf;
944         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
945         if (slice_type == SLICE_TYPE_P) {
946             m_costf = lambda * 2.5;
947             m_cost = m_costf;
948             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
949             m_costf = lambda * 4;
950             m_cost = m_costf;
951             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
952             m_costf = lambda * 1.5;
953             m_cost = m_costf;
954             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
955             m_costf = lambda * 3;
956             m_cost = m_costf;
957             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
958             m_costf = lambda * 5;
959             m_cost = m_costf;
960             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
961             /* BWD is not used in P-frame */
962             vme_state_message[MODE_INTER_BWD] = 0;
963         } else {
964             m_costf = lambda * 2.5;
965             m_cost = m_costf;
966             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
967             m_costf = lambda * 5.5;
968             m_cost = m_costf;
969             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
970             m_costf = lambda * 3.5;
971             m_cost = m_costf;
972             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
973             m_costf = lambda * 5.0;
974             m_cost = m_costf;
975             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
976             m_costf = lambda * 6.5;
977             m_cost = m_costf;
978             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
979             m_costf = lambda * 1.5;
980             m_cost = m_costf;
981             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
982         }
983     }
984     return;
985 }
986
987 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
988                                 struct encode_state *encode_state,
989                                 struct intel_encoder_context *encoder_context)
990 {
991     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
992     struct gen6_vme_context *vme_context = encoder_context->vme_context;
993     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
994     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
995     int qp;
996     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
997
998     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
999
1000     if (encoder_context->rate_control_mode == VA_RC_CQP)
1001         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1002     else
1003         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1004
1005     if (vme_state_message == NULL)
1006         return;
1007
1008     intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
1009 }
1010
1011 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
1012                                 struct encode_state *encode_state,
1013                                 struct intel_encoder_context *encoder_context)
1014 {
1015     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1016     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1017     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
1018     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
1019     int qp, m_cost, j, mv_count;
1020     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
1021     float   lambda, m_costf;
1022
1023     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
1024     int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
1025   
1026     if (vme_state_message == NULL)
1027         return;
1028  
1029     if (encoder_context->rate_control_mode == VA_RC_CQP)
1030         qp = q_matrix->quantization_index[0];
1031     else
1032         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1033
1034     lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
1035
1036     m_cost = lambda;
1037     vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
1038
1039     if (is_key_frame) {
1040         vme_state_message[MODE_INTRA_16X16] = 0;
1041         m_cost = lambda * 16; 
1042         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
1043         m_cost = lambda * 3;
1044         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
1045     } else {
1046         m_cost = 0;
1047         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
1048         for (j = 1; j < 3; j++) {
1049             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1050             m_cost = (int)m_costf;
1051             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1052         }
1053         mv_count = 3;
1054         for (j = 4; j <= 64; j *= 2) {
1055             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1056             m_cost = (int)m_costf;
1057             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
1058             mv_count++;
1059         }
1060
1061         if (qp < 92 ) {
1062             vme_state_message[MODE_INTRA_16X16] = 0x4a;
1063             vme_state_message[MODE_INTRA_4X4] = 0x4a;
1064             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
1065             vme_state_message[MODE_INTER_16X16] = 0x4a;
1066             vme_state_message[MODE_INTER_16X8] = 0x4a;
1067             vme_state_message[MODE_INTER_8X8] = 0x4a;
1068             vme_state_message[MODE_INTER_4X4] = 0x4a;
1069             vme_state_message[MODE_INTER_BWD] = 0;
1070             return;
1071         }
1072         m_costf = lambda * 10;
1073         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1074         m_cost = lambda * 24; 
1075         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
1076             
1077         m_costf = lambda * 3.5;
1078         m_cost = m_costf;
1079         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
1080
1081         m_costf = lambda * 2.5;
1082         m_cost = m_costf;
1083         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1084         m_costf = lambda * 4;
1085         m_cost = m_costf;
1086         vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
1087         m_costf = lambda * 1.5;
1088         m_cost = m_costf;
1089         vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
1090         m_costf = lambda * 5;
1091         m_cost = m_costf;
1092         vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
1093         /* BWD is not used in P-frame */
1094         vme_state_message[MODE_INTER_BWD] = 0;
1095     }
1096 }
1097
1098 #define         MB_SCOREBOARD_A         (1 << 0)
1099 #define         MB_SCOREBOARD_B         (1 << 1)
1100 #define         MB_SCOREBOARD_C         (1 << 2)
1101 void 
1102 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
1103 {
1104     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
1105     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
1106     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
1107                                                            MB_SCOREBOARD_B |
1108                                                            MB_SCOREBOARD_C);
1109
1110     /* In VME prediction the current mb depends on the neighbour 
1111      * A/B/C macroblock. So the left/up/up-right dependency should
1112      * be considered.
1113      */
1114     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
1115     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
1116     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
1117     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
1118     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
1119     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
1120
1121     vme_context->gpe_context.vfe_desc7.dword = 0;
1122     return;
1123 }
1124
1125 /* check whether the mb of (x_index, y_index) is out of bound */
1126 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
1127 {
1128     int mb_index;
1129     if (x_index < 0 || x_index >= mb_width)
1130         return -1;
1131     if (y_index < 0 || y_index >= mb_height)
1132         return -1;
1133
1134     mb_index = y_index * mb_width + x_index;
1135     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
1136         return -1;
1137     return 0;
1138 }
1139
1140 void
1141 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1142                                      struct encode_state *encode_state,
1143                                      int mb_width, int mb_height,
1144                                      int kernel,
1145                                      int transform_8x8_mode_flag,
1146                                      struct intel_encoder_context *encoder_context)
1147 {
1148     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1149     int mb_row;
1150     int s;
1151     unsigned int *command_ptr;
1152     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1153     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1154     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1155     int qp,qp_mb,qp_index;
1156     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1157
1158     if (encoder_context->rate_control_mode == VA_RC_CQP)
1159         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1160     else
1161         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1162
1163 #define         USE_SCOREBOARD          (1 << 21)
1164  
1165     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1166     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1167
1168     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1169         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1170         int first_mb = pSliceParameter->macroblock_address;
1171         int num_mb = pSliceParameter->num_macroblocks;
1172         unsigned int mb_intra_ub, score_dep;
1173         int x_outer, y_outer, x_inner, y_inner;
1174         int xtemp_outer = 0;
1175
1176         x_outer = first_mb % mb_width;
1177         y_outer = first_mb / mb_width;
1178         mb_row = y_outer;
1179
1180         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1181             x_inner = x_outer;
1182             y_inner = y_outer;
1183             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1184                 mb_intra_ub = 0;
1185                 score_dep = 0;
1186                 if (x_inner != 0) {
1187                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1188                     score_dep |= MB_SCOREBOARD_A; 
1189                 }
1190                 if (y_inner != mb_row) {
1191                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1192                     score_dep |= MB_SCOREBOARD_B;
1193                     if (x_inner != 0)
1194                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1195                     if (x_inner != (mb_width -1)) {
1196                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1197                         score_dep |= MB_SCOREBOARD_C;
1198                     }
1199                 }
1200
1201                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1202                 *command_ptr++ = kernel;
1203                 *command_ptr++ = USE_SCOREBOARD;
1204                 /* Indirect data */
1205                 *command_ptr++ = 0;
1206                 /* the (X, Y) term of scoreboard */
1207                 *command_ptr++ = ((y_inner << 16) | x_inner);
1208                 *command_ptr++ = score_dep;
1209                 /*inline data */
1210                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1211                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1212                 /* QP occupies one byte */
1213                 if (vme_context->roi_enabled) {
1214                     qp_index = y_inner * mb_width + x_inner;
1215                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1216                 } else
1217                     qp_mb = qp;
1218                 *command_ptr++ = qp_mb;
1219                 x_inner -= 2;
1220                 y_inner += 1;
1221             }
1222             x_outer += 1;
1223         }
1224
1225         xtemp_outer = mb_width - 2;
1226         if (xtemp_outer < 0)
1227             xtemp_outer = 0;
1228         x_outer = xtemp_outer;
1229         y_outer = first_mb / mb_width;
1230         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1231             y_inner = y_outer;
1232             x_inner = x_outer;
1233             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1234                 mb_intra_ub = 0;
1235                 score_dep = 0;
1236                 if (x_inner != 0) {
1237                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1238                     score_dep |= MB_SCOREBOARD_A; 
1239                 }
1240                 if (y_inner != mb_row) {
1241                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1242                     score_dep |= MB_SCOREBOARD_B;
1243                     if (x_inner != 0)
1244                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1245
1246                     if (x_inner != (mb_width -1)) {
1247                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1248                         score_dep |= MB_SCOREBOARD_C;
1249                     }
1250                 }
1251
1252                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1253                 *command_ptr++ = kernel;
1254                 *command_ptr++ = USE_SCOREBOARD;
1255                 /* Indirect data */
1256                 *command_ptr++ = 0;
1257                 /* the (X, Y) term of scoreboard */
1258                 *command_ptr++ = ((y_inner << 16) | x_inner);
1259                 *command_ptr++ = score_dep;
1260                 /*inline data */
1261                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1262                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1263                 /* qp occupies one byte */
1264                 if (vme_context->roi_enabled) {
1265                     qp_index = y_inner * mb_width + x_inner;
1266                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1267                 } else
1268                     qp_mb = qp;
1269                 *command_ptr++ = qp_mb;
1270
1271                 x_inner -= 2;
1272                 y_inner += 1;
1273             }
1274             x_outer++;
1275             if (x_outer >= mb_width) {
1276                 y_outer += 1;
1277                 x_outer = xtemp_outer;
1278             }           
1279         }
1280     }
1281
1282     *command_ptr++ = 0;
1283     *command_ptr++ = MI_BATCH_BUFFER_END;
1284
1285     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1286 }
1287
1288 static uint8_t
1289 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1290 {
1291     unsigned int is_long_term =
1292         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1293     unsigned int is_top_field =
1294         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1295     unsigned int is_bottom_field =
1296         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1297
1298     return ((is_long_term                         << 6) |
1299             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1300             (frame_store_id                       << 1) |
1301             ((is_top_field ^ 1) & is_bottom_field));
1302 }
1303
1304 void
1305 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1306                             struct encode_state *encode_state,
1307                             struct intel_encoder_context *encoder_context)
1308 {
1309     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1310     struct intel_batchbuffer *batch = encoder_context->base.batch;
1311     int slice_type;
1312     struct object_surface *obj_surface;
1313     unsigned int fref_entry, bref_entry;
1314     int frame_index, i;
1315     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1316
1317     fref_entry = 0x80808080;
1318     bref_entry = 0x80808080;
1319     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1320
1321     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1322         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1323
1324         if (ref_idx_l0 > 3) {
1325             WARN_ONCE("ref_idx_l0 is out of range\n");
1326             ref_idx_l0 = 0;
1327         }
1328
1329         obj_surface = vme_context->used_reference_objects[0];
1330         frame_index = -1;
1331         for (i = 0; i < 16; i++) {
1332             if (obj_surface &&
1333                 obj_surface == encode_state->reference_objects[i]) {
1334                 frame_index = i;
1335                 break;
1336             }
1337         }
1338         if (frame_index == -1) {
1339             WARN_ONCE("RefPicList0 is not found in DPB!\n");
1340         } else {
1341             int ref_idx_l0_shift = ref_idx_l0 * 8;
1342             fref_entry &= ~(0xFF << ref_idx_l0_shift);
1343             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1344         }
1345     }
1346
1347     if (slice_type == SLICE_TYPE_B) {
1348         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1349
1350         if (ref_idx_l1 > 3) {
1351             WARN_ONCE("ref_idx_l1 is out of range\n");
1352             ref_idx_l1 = 0;
1353         }
1354
1355         obj_surface = vme_context->used_reference_objects[1];
1356         frame_index = -1;
1357         for (i = 0; i < 16; i++) {
1358             if (obj_surface &&
1359                 obj_surface == encode_state->reference_objects[i]) {
1360                 frame_index = i;
1361                 break;
1362             }
1363         }
1364         if (frame_index == -1) {
1365             WARN_ONCE("RefPicList1 is not found in DPB!\n");
1366         } else {
1367             int ref_idx_l1_shift = ref_idx_l1 * 8;
1368             bref_entry &= ~(0xFF << ref_idx_l1_shift);
1369             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1370         }
1371     }
1372
1373     BEGIN_BCS_BATCH(batch, 10);
1374     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1375     OUT_BCS_BATCH(batch, 0);                  //Select L0
1376     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1377     for(i = 0; i < 7; i++) {
1378         OUT_BCS_BATCH(batch, 0x80808080);
1379     }
1380     ADVANCE_BCS_BATCH(batch);
1381
1382     BEGIN_BCS_BATCH(batch, 10);
1383     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1384     OUT_BCS_BATCH(batch, 1);                  //Select L1
1385     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1386     for(i = 0; i < 7; i++) {
1387         OUT_BCS_BATCH(batch, 0x80808080);
1388     }
1389     ADVANCE_BCS_BATCH(batch);
1390 }
1391
1392
1393 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1394                                  struct encode_state *encode_state,
1395                                  struct intel_encoder_context *encoder_context)
1396 {
1397     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1398     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1399     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1400     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1401     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1402     uint32_t mv_x, mv_y;
1403     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1404     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1405     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1406
1407     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1408         mv_x = 512;
1409         mv_y = 64;
1410     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1411         mv_x = 1024;
1412         mv_y = 128;
1413     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1414         mv_x = 2048;
1415         mv_y = 128;
1416     } else {
1417         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1418         mv_x = 512;
1419         mv_y = 64;
1420     }
1421
1422     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1423     if (pic_param->picture_type != VAEncPictureTypeIntra) {
1424         int qp, m_cost, j, mv_count;
1425         float   lambda, m_costf;
1426         slice_param = (VAEncSliceParameterBufferMPEG2 *)
1427             encode_state->slice_params_ext[0]->buffer;
1428         qp = slice_param->quantiser_scale_code;
1429         lambda = intel_lambda_qp(qp);
1430         /* No Intra prediction. So it is zero */
1431         vme_state_message[MODE_INTRA_8X8] = 0;
1432         vme_state_message[MODE_INTRA_4X4] = 0;
1433         vme_state_message[MODE_INTER_MV0] = 0;
1434         for (j = 1; j < 3; j++) {
1435             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1436             m_cost = (int)m_costf;
1437             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1438         }
1439         mv_count = 3;
1440         for (j = 4; j <= 64; j *= 2) {
1441             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1442             m_cost = (int)m_costf;
1443             vme_state_message[MODE_INTER_MV0 + mv_count] =
1444                 intel_format_lutvalue(m_cost, 0x6f);
1445             mv_count++;
1446         }
1447         m_cost = lambda;
1448         /* It can only perform the 16x16 search. So mode cost can be ignored for
1449          * the other mode. for example: 16x8/8x8
1450          */
1451         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1452         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1453
1454         vme_state_message[MODE_INTER_16X8] = 0;
1455         vme_state_message[MODE_INTER_8X8] = 0;
1456         vme_state_message[MODE_INTER_8X4] = 0;
1457         vme_state_message[MODE_INTER_4X4] = 0;
1458         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1459
1460     }
1461     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1462
1463     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1464         width_in_mbs;
1465 }
1466
1467 void
1468 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1469                                            struct encode_state *encode_state,
1470                                            int mb_width, int mb_height,
1471                                            int kernel,
1472                                            struct intel_encoder_context *encoder_context)
1473 {
1474     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1475     unsigned int *command_ptr;
1476
1477 #define         MPEG2_SCOREBOARD                (1 << 21)
1478
1479     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1480     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1481
1482     {
1483         unsigned int mb_intra_ub, score_dep;
1484         int x_outer, y_outer, x_inner, y_inner;
1485         int xtemp_outer = 0;
1486         int first_mb = 0;
1487         int num_mb = mb_width * mb_height;
1488
1489         x_outer = 0;
1490         y_outer = 0;
1491
1492
1493         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1494             x_inner = x_outer;
1495             y_inner = y_outer;
1496             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1497                 mb_intra_ub = 0;
1498                 score_dep = 0;
1499                 if (x_inner != 0) {
1500                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1501                     score_dep |= MB_SCOREBOARD_A; 
1502                 }
1503                 if (y_inner != 0) {
1504                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1505                     score_dep |= MB_SCOREBOARD_B;
1506
1507                     if (x_inner != 0)
1508                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1509
1510                     if (x_inner != (mb_width -1)) {
1511                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1512                         score_dep |= MB_SCOREBOARD_C;
1513                     }
1514                 }
1515
1516                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1517                 *command_ptr++ = kernel;
1518                 *command_ptr++ = MPEG2_SCOREBOARD;
1519                 /* Indirect data */
1520                 *command_ptr++ = 0;
1521                 /* the (X, Y) term of scoreboard */
1522                 *command_ptr++ = ((y_inner << 16) | x_inner);
1523                 *command_ptr++ = score_dep;
1524                 /*inline data */
1525                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1526                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1527                 x_inner -= 2;
1528                 y_inner += 1;
1529             }
1530             x_outer += 1;
1531         }
1532
1533         xtemp_outer = mb_width - 2;
1534         if (xtemp_outer < 0)
1535             xtemp_outer = 0;
1536         x_outer = xtemp_outer;
1537         y_outer = 0;
1538         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1539             y_inner = y_outer;
1540             x_inner = x_outer;
1541             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1542                 mb_intra_ub = 0;
1543                 score_dep = 0;
1544                 if (x_inner != 0) {
1545                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1546                     score_dep |= MB_SCOREBOARD_A; 
1547                 }
1548                 if (y_inner != 0) {
1549                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1550                     score_dep |= MB_SCOREBOARD_B;
1551
1552                     if (x_inner != 0)
1553                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1554
1555                     if (x_inner != (mb_width -1)) {
1556                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1557                         score_dep |= MB_SCOREBOARD_C;
1558                     }
1559                 }
1560
1561                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1562                 *command_ptr++ = kernel;
1563                 *command_ptr++ = MPEG2_SCOREBOARD;
1564                 /* Indirect data */
1565                 *command_ptr++ = 0;
1566                 /* the (X, Y) term of scoreboard */
1567                 *command_ptr++ = ((y_inner << 16) | x_inner);
1568                 *command_ptr++ = score_dep;
1569                 /*inline data */
1570                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1571                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1572
1573                 x_inner -= 2;
1574                 y_inner += 1;
1575             }
1576             x_outer++;
1577             if (x_outer >= mb_width) {
1578                 y_outer += 1;
1579                 x_outer = xtemp_outer;
1580             }           
1581         }
1582     }
1583
1584     *command_ptr++ = 0;
1585     *command_ptr++ = MI_BATCH_BUFFER_END;
1586
1587     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1588     return;
1589 }
1590
1591 static int
1592 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1593                           VAPictureH264 *ref_list,
1594                           int num_pictures,
1595                           int dir)
1596 {
1597     int i, found = -1, min = 0x7FFFFFFF;
1598
1599     for (i = 0; i < num_pictures; i++) {
1600         int tmp;
1601
1602         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1603             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1604             break;
1605
1606         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1607
1608         if (dir)
1609             tmp = -tmp;
1610
1611         if (tmp > 0 && tmp < min) {
1612             min = tmp;
1613             found = i;
1614         }
1615     }
1616
1617     return found;
1618 }
1619
1620 void
1621 intel_avc_vme_reference_state(VADriverContextP ctx,
1622                               struct encode_state *encode_state,
1623                               struct intel_encoder_context *encoder_context,
1624                               int list_index,
1625                               int surface_index,
1626                               void (* vme_source_surface_state)(
1627                                   VADriverContextP ctx,
1628                                   int index,
1629                                   struct object_surface *obj_surface,
1630                                   struct intel_encoder_context *encoder_context))
1631 {
1632     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1633     struct object_surface *obj_surface = NULL;
1634     struct i965_driver_data *i965 = i965_driver_data(ctx);
1635     VASurfaceID ref_surface_id;
1636     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1637     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1638     int max_num_references;
1639     VAPictureH264 *curr_pic;
1640     VAPictureH264 *ref_list;
1641     int ref_idx;
1642
1643     if (list_index == 0) {
1644         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1645         ref_list = slice_param->RefPicList0;
1646     } else {
1647         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1648         ref_list = slice_param->RefPicList1;
1649     }
1650
1651     if (max_num_references == 1) {
1652         if (list_index == 0) {
1653             ref_surface_id = slice_param->RefPicList0[0].picture_id;
1654             vme_context->used_references[0] = &slice_param->RefPicList0[0];
1655         } else {
1656             ref_surface_id = slice_param->RefPicList1[0].picture_id;
1657             vme_context->used_references[1] = &slice_param->RefPicList1[0];
1658         }
1659
1660         if (ref_surface_id != VA_INVALID_SURFACE)
1661             obj_surface = SURFACE(ref_surface_id);
1662
1663         if (!obj_surface ||
1664             !obj_surface->bo) {
1665             obj_surface = encode_state->reference_objects[list_index];
1666             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1667         }
1668
1669         ref_idx = 0;
1670     } else {
1671         curr_pic = &pic_param->CurrPic;
1672
1673         /* select the reference frame in temporal space */
1674         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1675         ref_surface_id = ref_list[ref_idx].picture_id;
1676
1677         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1678             obj_surface = SURFACE(ref_surface_id);
1679
1680         vme_context->used_reference_objects[list_index] = obj_surface;
1681         vme_context->used_references[list_index] = &ref_list[ref_idx];
1682     }
1683
1684     if (obj_surface &&
1685         obj_surface->bo) {
1686         assert(ref_idx >= 0);
1687         vme_context->used_reference_objects[list_index] = obj_surface;
1688         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1689         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1690                                                     ref_idx << 16 |
1691                                                     ref_idx <<  8 |
1692                                                     ref_idx);
1693     } else {
1694         vme_context->used_reference_objects[list_index] = NULL;
1695         vme_context->used_references[list_index] = NULL;
1696         vme_context->ref_index_in_mb[list_index] = 0;
1697     }
1698 }
1699
1700 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1701                                         struct encode_state *encode_state,
1702                                         struct intel_encoder_context *encoder_context,
1703                                         int slice_index,
1704                                         struct intel_batchbuffer *slice_batch)
1705 {
1706     int count, i, start_index;
1707     unsigned int length_in_bits;
1708     VAEncPackedHeaderParameterBuffer *param = NULL;
1709     unsigned int *header_data = NULL;
1710     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1711     int slice_header_index;
1712
1713     if (encode_state->slice_header_index[slice_index] == 0)
1714         slice_header_index = -1;
1715     else
1716         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1717
1718     count = encode_state->slice_rawdata_count[slice_index];
1719     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1720
1721     for (i = 0; i < count; i++) {
1722         unsigned int skip_emul_byte_cnt;
1723
1724         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1725
1726         param = (VAEncPackedHeaderParameterBuffer *)
1727                     (encode_state->packed_header_params_ext[start_index + i]->buffer);
1728
1729         /* skip the slice header packed data type as it is lastly inserted */
1730         if (param->type == VAEncPackedHeaderSlice)
1731             continue;
1732
1733         length_in_bits = param->bit_length;
1734
1735         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1736
1737         /* as the slice header is still required, the last header flag is set to
1738          * zero.
1739          */
1740         mfc_context->insert_object(ctx,
1741                                    encoder_context,
1742                                    header_data,
1743                                    ALIGN(length_in_bits, 32) >> 5,
1744                                    length_in_bits & 0x1f,
1745                                    skip_emul_byte_cnt,
1746                                    0,
1747                                    0,
1748                                    !param->has_emulation_bytes,
1749                                    slice_batch);
1750     }
1751
1752     if (slice_header_index == -1) {
1753         unsigned char *slice_header = NULL;
1754         int slice_header_length_in_bits = 0;
1755         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1756         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1757         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1758
1759         /* No slice header data is passed. And the driver needs to generate it */
1760         /* For the Normal H264 */
1761         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1762                                                              pPicParameter,
1763                                                              pSliceParameter,
1764                                                              &slice_header);
1765         mfc_context->insert_object(ctx, encoder_context,
1766                                    (unsigned int *)slice_header,
1767                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1768                                    slice_header_length_in_bits & 0x1f,
1769                                    5,  /* first 5 bytes are start code + nal unit type */
1770                                    1, 0, 1, slice_batch);
1771
1772         free(slice_header);
1773     } else {
1774         unsigned int skip_emul_byte_cnt;
1775
1776         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1777
1778         param = (VAEncPackedHeaderParameterBuffer *)
1779                     (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1780         length_in_bits = param->bit_length;
1781
1782         /* as the slice header is the last header data for one slice,
1783          * the last header flag is set to one.
1784          */
1785         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1786
1787         mfc_context->insert_object(ctx,
1788                                    encoder_context,
1789                                    header_data,
1790                                    ALIGN(length_in_bits, 32) >> 5,
1791                                    length_in_bits & 0x1f,
1792                                    skip_emul_byte_cnt,
1793                                    1,
1794                                    0,
1795                                    !param->has_emulation_bytes,
1796                                    slice_batch);
1797     }
1798
1799     return;
1800 }
1801
1802 void
1803 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1804                                 struct encode_state *encode_state,
1805                                 struct intel_encoder_context *encoder_context)
1806 {
1807     struct i965_driver_data *i965 = i965_driver_data(ctx);
1808     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1809     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1810     int qp;
1811     dri_bo *bo;
1812     uint8_t *cost_table;
1813
1814     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1815
1816
1817     if (slice_type == SLICE_TYPE_I) {
1818         if (vme_context->i_qp_cost_table)
1819             return;
1820     } else if (slice_type == SLICE_TYPE_P) {
1821         if (vme_context->p_qp_cost_table)
1822             return;
1823     } else {
1824         if (vme_context->b_qp_cost_table)
1825             return;
1826     }
1827
1828     /* It is enough to allocate 32 bytes for each qp. */
1829     bo = dri_bo_alloc(i965->intel.bufmgr,
1830                       "cost_table ",
1831                       QP_MAX * 32,
1832                       64);
1833
1834     dri_bo_map(bo, 1);
1835     assert(bo->virtual);
1836     cost_table = (uint8_t *)(bo->virtual);
1837     for (qp = 0; qp < QP_MAX; qp++) {
1838         intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1839         cost_table += 32;
1840     }
1841
1842     dri_bo_unmap(bo);
1843
1844     if (slice_type == SLICE_TYPE_I) {
1845         vme_context->i_qp_cost_table = bo;
1846     } else if (slice_type == SLICE_TYPE_P) {
1847         vme_context->p_qp_cost_table = bo;
1848     } else {
1849         vme_context->b_qp_cost_table = bo;
1850     }
1851
1852     vme_context->cost_table_size = QP_MAX * 32;
1853     return;
1854 }
1855
1856 extern void
1857 intel_h264_setup_cost_surface(VADriverContextP ctx,
1858                               struct encode_state *encode_state,
1859                               struct intel_encoder_context *encoder_context,
1860                               unsigned long binding_table_offset,
1861                               unsigned long surface_state_offset)
1862 {
1863     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1864     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1865     dri_bo *bo;
1866
1867
1868     struct i965_buffer_surface cost_table;
1869
1870     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1871
1872
1873     if (slice_type == SLICE_TYPE_I) {
1874         bo = vme_context->i_qp_cost_table;
1875     } else if (slice_type == SLICE_TYPE_P) {
1876         bo = vme_context->p_qp_cost_table;
1877     } else {
1878         bo = vme_context->b_qp_cost_table;
1879     }
1880
1881     cost_table.bo = bo;
1882     cost_table.num_blocks = QP_MAX;
1883     cost_table.pitch = 16;
1884     cost_table.size_block = 32;
1885
1886     vme_context->vme_buffer_suface_setup(ctx,
1887                                          &vme_context->gpe_context,
1888                                          &cost_table,
1889                                          binding_table_offset,
1890                                          surface_state_offset);
1891 }
1892
1893 /*
1894  * the idea of conversion between qp and qstep comes from scaling process
1895  * of transform coeff for Luma component in H264 spec.
1896  *   2^(Qpy / 6 - 6)
1897  * In order to avoid too small qstep, it is multiplied by 16.
1898  */
1899 static float intel_h264_qp_qstep(int qp)
1900 {
1901     float value, qstep;
1902     value = qp;
1903     value = value / 6 - 2;
1904     qstep = powf(2, value);
1905     return qstep;
1906 }
1907
1908 static int intel_h264_qstep_qp(float qstep)
1909 {
1910     float qp;
1911
1912     qp = 12.0f + 6.0f * log2f(qstep);
1913
1914     return floorf(qp);
1915 }
1916
1917 /*
1918  * Currently it is based on the following assumption:
1919  * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1920  *                                 total_aread * 1 / baseqp_qstep
1921  *
1922  * qstep is the linearized quantizer of H264 quantizer
1923  */
1924 typedef struct {
1925     int row_start_in_mb;
1926     int row_end_in_mb;
1927     int col_start_in_mb;
1928     int col_end_in_mb;
1929
1930     int width_mbs;
1931     int height_mbs;
1932
1933     int roi_qp;
1934 } ROIRegionParam;
1935
1936 static VAStatus
1937 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1938                        int base_qp,
1939                        struct encode_state *encode_state,
1940                        struct intel_encoder_context *encoder_context)
1941 {
1942     int nonroi_qp;
1943     int min_qp = MAX(1, encoder_context->brc.min_qp);
1944     bool quickfill = 0;
1945
1946     ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1947     int num_roi = 0;
1948     int i,j;
1949
1950     float temp;
1951     float qstep_nonroi, qstep_base;
1952     float roi_area, total_area, nonroi_area;
1953     float sum_roi;
1954
1955     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1956     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1957     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1958     int mbs_in_picture = width_in_mbs * height_in_mbs;
1959
1960     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1961     VAStatus vaStatus = VA_STATUS_SUCCESS;
1962
1963     /* currently roi_value_is_qp_delta is the only supported mode of priority.
1964      *
1965      * qp_delta set by user is added to base_qp, which is then clapped by
1966      * [base_qp-min_delta, base_qp+max_delta].
1967      */
1968     ASSERT_RET(encoder_context->brc.roi_value_is_qp_delta, VA_STATUS_ERROR_INVALID_PARAMETER);
1969
1970     num_roi = encoder_context->brc.num_roi;
1971
1972     /* when the base_qp is lower than 12, the quality is quite good based
1973      * on the H264 test experience.
1974      * In such case it is unnecessary to adjust the quality for ROI region.
1975      */
1976     if (base_qp <= 12) {
1977         nonroi_qp = base_qp;
1978         quickfill = 1;
1979         goto qp_fill;
1980     }
1981
1982     sum_roi = 0.0f;
1983     roi_area = 0;
1984     for (i = 0; i < num_roi; i++) {
1985         int row_start, row_end, col_start, col_end;
1986         int roi_width_mbs, roi_height_mbs;
1987         int mbs_in_roi;
1988         int roi_qp;
1989         float qstep_roi;
1990
1991         col_start = encoder_context->brc.roi[i].left;
1992         col_end = encoder_context->brc.roi[i].right;
1993         row_start = encoder_context->brc.roi[i].top;
1994         row_end = encoder_context->brc.roi[i].bottom;
1995
1996         col_start = col_start / 16;
1997         col_end = (col_end + 15) / 16;
1998         row_start = row_start / 16;
1999         row_end = (row_end + 15) / 16;
2000
2001         roi_width_mbs = col_end - col_start;
2002         roi_height_mbs = row_end - row_start;
2003         mbs_in_roi = roi_width_mbs * roi_height_mbs;
2004
2005         param_regions[i].row_start_in_mb = row_start;
2006         param_regions[i].row_end_in_mb = row_end;
2007         param_regions[i].col_start_in_mb = col_start;
2008         param_regions[i].col_end_in_mb = col_end;
2009         param_regions[i].width_mbs = roi_width_mbs;
2010         param_regions[i].height_mbs = roi_height_mbs;
2011
2012         roi_qp = base_qp + encoder_context->brc.roi[i].value;
2013         BRC_CLIP(roi_qp, min_qp, 51);
2014
2015         param_regions[i].roi_qp = roi_qp;
2016         qstep_roi = intel_h264_qp_qstep(roi_qp);
2017
2018         roi_area += mbs_in_roi;
2019         sum_roi += mbs_in_roi / qstep_roi;
2020     }
2021
2022     total_area = mbs_in_picture;
2023     nonroi_area = total_area - roi_area;
2024
2025     qstep_base = intel_h264_qp_qstep(base_qp);
2026     temp = (total_area / qstep_base - sum_roi);
2027
2028     if (temp < 0) {
2029         nonroi_qp = 51;
2030     } else {
2031         qstep_nonroi = nonroi_area / temp;
2032         nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
2033     }
2034
2035     BRC_CLIP(nonroi_qp, min_qp, 51);
2036
2037 qp_fill:
2038     memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
2039     if (!quickfill) {
2040         char *qp_ptr;
2041
2042         for (i = 0; i < num_roi; i++) {
2043             for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
2044                 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
2045                 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
2046             }
2047         }
2048     }
2049     return vaStatus;
2050 }
2051
2052 extern void
2053 intel_h264_enc_roi_config(VADriverContextP ctx,
2054                           struct encode_state *encode_state,
2055                           struct intel_encoder_context *encoder_context)
2056 {
2057     char *qp_ptr;
2058     int i, j;
2059     struct i965_driver_data *i965 = i965_driver_data(ctx);
2060     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2061     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2062     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2063     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
2064     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
2065
2066     int row_start, row_end, col_start, col_end;
2067     int num_roi = 0;
2068
2069     vme_context->roi_enabled = 0;
2070     /* Restriction: Disable ROI when multi-slice is enabled */
2071     if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
2072         return;
2073
2074     vme_context->roi_enabled = !!encoder_context->brc.num_roi;
2075
2076     if (!vme_context->roi_enabled)
2077         return;
2078
2079     if ((vme_context->saved_width_mbs !=  width_in_mbs) ||
2080         (vme_context->saved_height_mbs != height_in_mbs)) {
2081         free(vme_context->qp_per_mb);
2082         vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
2083
2084         vme_context->saved_width_mbs = width_in_mbs;
2085         vme_context->saved_height_mbs = height_in_mbs;
2086         assert(vme_context->qp_per_mb);
2087     }
2088     if (encoder_context->rate_control_mode == VA_RC_CBR) {
2089         /*
2090          * TODO: More complex Qp adjust needs to be added.
2091          * Currently it is initialized to slice_qp.
2092          */
2093         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2094         int qp;
2095         int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
2096
2097         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
2098         intel_h264_enc_roi_cbr(ctx, qp, encode_state, encoder_context);
2099
2100     } else if (encoder_context->rate_control_mode == VA_RC_CQP){
2101         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2102         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2103         int qp;
2104         int min_qp = MAX(1, encoder_context->brc.min_qp);
2105
2106         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2107         memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
2108
2109
2110         for (j = num_roi; j ; j--) {
2111             int qp_delta, qp_clip;
2112
2113             col_start = encoder_context->brc.roi[i].left;
2114             col_end = encoder_context->brc.roi[i].right;
2115             row_start = encoder_context->brc.roi[i].top;
2116             row_end = encoder_context->brc.roi[i].bottom;
2117
2118             col_start = col_start / 16;
2119             col_end = (col_end + 15) / 16;
2120             row_start = row_start / 16;
2121             row_end = (row_end + 15) / 16;
2122
2123             qp_delta = encoder_context->brc.roi[i].value;
2124             qp_clip = qp + qp_delta;
2125
2126             BRC_CLIP(qp_clip, min_qp, 51);
2127
2128             for (i = row_start; i < row_end; i++) {
2129                 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
2130                 memset(qp_ptr, qp_clip, (col_end - col_start));
2131             }
2132         }
2133     } else {
2134         /*
2135          * TODO: Disable it for non CBR-CQP.
2136          */
2137         vme_context->roi_enabled = 0;
2138     }
2139
2140     if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2141         encoder_context->soft_batch_force = 1;
2142
2143     return;
2144 }
2145
2146 /* HEVC */
2147 static int
2148 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2149                            VAPictureHEVC *ref_list,
2150                            int num_pictures,
2151                            int dir)
2152 {
2153     int i, found = -1, min = 0x7FFFFFFF;
2154
2155     for (i = 0; i < num_pictures; i++) {
2156         int tmp;
2157
2158         if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2159             (ref_list[i].picture_id == VA_INVALID_SURFACE))
2160             break;
2161
2162         tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2163
2164         if (dir)
2165             tmp = -tmp;
2166
2167         if (tmp > 0 && tmp < min) {
2168             min = tmp;
2169             found = i;
2170         }
2171     }
2172
2173     return found;
2174 }
2175 void
2176 intel_hevc_vme_reference_state(VADriverContextP ctx,
2177                                struct encode_state *encode_state,
2178                                struct intel_encoder_context *encoder_context,
2179                                int list_index,
2180                                int surface_index,
2181                                void (* vme_source_surface_state)(
2182                                    VADriverContextP ctx,
2183                                    int index,
2184                                    struct object_surface *obj_surface,
2185                                    struct intel_encoder_context *encoder_context))
2186 {
2187     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2188     struct object_surface *obj_surface = NULL;
2189     struct i965_driver_data *i965 = i965_driver_data(ctx);
2190     VASurfaceID ref_surface_id;
2191     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2192     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2193     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2194     int max_num_references;
2195     VAPictureHEVC *curr_pic;
2196     VAPictureHEVC *ref_list;
2197     int ref_idx;
2198     unsigned int is_hevc10 = 0;
2199     GenHevcSurface *hevc_encoder_surface = NULL;
2200
2201     if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2202         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2203         is_hevc10 = 1;
2204
2205     if (list_index == 0) {
2206         max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2207         ref_list = slice_param->ref_pic_list0;
2208     } else {
2209         max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2210         ref_list = slice_param->ref_pic_list1;
2211     }
2212
2213     if (max_num_references == 1) {
2214         if (list_index == 0) {
2215             ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2216             vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2217         } else {
2218             ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2219             vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2220         }
2221
2222         if (ref_surface_id != VA_INVALID_SURFACE)
2223             obj_surface = SURFACE(ref_surface_id);
2224
2225         if (!obj_surface ||
2226             !obj_surface->bo) {
2227             obj_surface = encode_state->reference_objects[list_index];
2228             vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2229         }
2230
2231         ref_idx = 0;
2232     } else {
2233         curr_pic = &pic_param->decoded_curr_pic;
2234
2235         /* select the reference frame in temporal space */
2236         ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2237         ref_surface_id = ref_list[ref_idx].picture_id;
2238
2239         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2240             obj_surface = SURFACE(ref_surface_id);
2241
2242         vme_context->used_reference_objects[list_index] = obj_surface;
2243         vme_context->used_references[list_index] = &ref_list[ref_idx];
2244     }
2245
2246     if (obj_surface &&
2247         obj_surface->bo) {
2248         assert(ref_idx >= 0);
2249         vme_context->used_reference_objects[list_index] = obj_surface;
2250
2251         if(is_hevc10){
2252             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2253             assert(hevc_encoder_surface);
2254             obj_surface = hevc_encoder_surface->nv12_surface_obj;
2255         }
2256         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2257         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2258                 ref_idx << 16 |
2259                 ref_idx <<  8 |
2260                 ref_idx);
2261     } else {
2262         vme_context->used_reference_objects[list_index] = NULL;
2263         vme_context->used_references[list_index] = NULL;
2264         vme_context->ref_index_in_mb[list_index] = 0;
2265     }
2266 }
2267
2268 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2269                                      struct encode_state *encode_state,
2270                                      struct intel_encoder_context *encoder_context)
2271 {
2272     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2273     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2274     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2275     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2276     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2277     int qp, m_cost, j, mv_count;
2278     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2279     float   lambda, m_costf;
2280
2281     /* here no SI SP slice for HEVC, do not need slice fixup */
2282     int slice_type = slice_param->slice_type;
2283
2284
2285     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2286
2287     if(encoder_context->rate_control_mode == VA_RC_CBR)
2288     {
2289         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2290         if(slice_type == HEVC_SLICE_B) {
2291             if(pSequenceParameter->ip_period == 1)
2292             {
2293                 slice_type = HEVC_SLICE_P;
2294                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2295
2296             }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2297                 slice_type = HEVC_SLICE_P;
2298                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2299             }
2300         }
2301
2302     }
2303
2304     if (vme_state_message == NULL)
2305         return;
2306
2307     assert(qp <= QP_MAX);
2308     lambda = intel_lambda_qp(qp);
2309     if (slice_type == HEVC_SLICE_I) {
2310         vme_state_message[MODE_INTRA_16X16] = 0;
2311         m_cost = lambda * 4;
2312         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2313         m_cost = lambda * 16;
2314         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2315         m_cost = lambda * 3;
2316         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2317     } else {
2318         m_cost = 0;
2319         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2320         for (j = 1; j < 3; j++) {
2321             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2322             m_cost = (int)m_costf;
2323             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2324         }
2325         mv_count = 3;
2326         for (j = 4; j <= 64; j *= 2) {
2327             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2328             m_cost = (int)m_costf;
2329             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2330             mv_count++;
2331         }
2332
2333         if (qp <= 25) {
2334             vme_state_message[MODE_INTRA_16X16] = 0x4a;
2335             vme_state_message[MODE_INTRA_8X8] = 0x4a;
2336             vme_state_message[MODE_INTRA_4X4] = 0x4a;
2337             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2338             vme_state_message[MODE_INTER_16X16] = 0x4a;
2339             vme_state_message[MODE_INTER_16X8] = 0x4a;
2340             vme_state_message[MODE_INTER_8X8] = 0x4a;
2341             vme_state_message[MODE_INTER_8X4] = 0x4a;
2342             vme_state_message[MODE_INTER_4X4] = 0x4a;
2343             vme_state_message[MODE_INTER_BWD] = 0x2a;
2344             return;
2345         }
2346         m_costf = lambda * 10;
2347         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2348         m_cost = lambda * 14;
2349         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2350         m_cost = lambda * 24;
2351         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2352         m_costf = lambda * 3.5;
2353         m_cost = m_costf;
2354         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2355         if (slice_type == HEVC_SLICE_P) {
2356             m_costf = lambda * 2.5;
2357             m_cost = m_costf;
2358             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2359             m_costf = lambda * 4;
2360             m_cost = m_costf;
2361             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2362             m_costf = lambda * 1.5;
2363             m_cost = m_costf;
2364             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2365             m_costf = lambda * 3;
2366             m_cost = m_costf;
2367             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2368             m_costf = lambda * 5;
2369             m_cost = m_costf;
2370             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2371             /* BWD is not used in P-frame */
2372             vme_state_message[MODE_INTER_BWD] = 0;
2373         } else {
2374             m_costf = lambda * 2.5;
2375             m_cost = m_costf;
2376             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2377             m_costf = lambda * 5.5;
2378             m_cost = m_costf;
2379             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2380             m_costf = lambda * 3.5;
2381             m_cost = m_costf;
2382             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2383             m_costf = lambda * 5.0;
2384             m_cost = m_costf;
2385             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2386             m_costf = lambda * 6.5;
2387             m_cost = m_costf;
2388             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2389             m_costf = lambda * 1.5;
2390             m_cost = m_costf;
2391             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
2392         }
2393     }
2394 }