OSDN Git Service

Code cleanup for vme/mfc initializing on SKL
[android-x86/hardware-intel-common-vaapi.git] / src / gen6_mfc_common.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "gen9_mfc.h"
45 #include "intel_media.h"
46
47 #ifndef HAVE_LOG2F
48 #define log2f(x) (logf(x)/(float)M_LN2)
49 #endif
50
51 int intel_avc_enc_slice_type_fixup(int slice_type)
52 {
53     if (slice_type == SLICE_TYPE_SP ||
54         slice_type == SLICE_TYPE_P)
55         slice_type = SLICE_TYPE_P;
56     else if (slice_type == SLICE_TYPE_SI ||
57              slice_type == SLICE_TYPE_I)
58         slice_type = SLICE_TYPE_I;
59     else {
60         if (slice_type != SLICE_TYPE_B)
61             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
62
63         slice_type = SLICE_TYPE_B;
64     }
65
66     return slice_type;
67 }
68
69 static void
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
71                                         struct gen6_mfc_context *mfc_context)
72 {
73     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
74     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
75     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
76     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
77     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
78     int intra_mb_size = inter_mb_size * 5.0;
79     int i;
80
81     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
82     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
83     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
84     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
85     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
86     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
87
88     for(i = 0 ; i < 3; i++) {
89         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
90         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
91         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
92         mfc_context->bit_rate_control_context[i].GrowInit = 6;
93         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
94         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
95         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
96         
97         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
98         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
99         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
100         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
101         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
102         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
103     }
104     
105     mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
106     mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
107     mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
108
109     mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
110     mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
111     mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
112 }
113
114 static void intel_mfc_brc_init(struct encode_state *encode_state,
115                                struct intel_encoder_context* encoder_context)
116 {
117     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
118     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
119     VAEncMiscParameterBuffer* pMiscParamHRD = NULL;
120     VAEncMiscParameterHRD* pParameterHRD = NULL;
121     double bitrate = pSequenceParameter->bits_per_second;
122     double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
123     int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
124     int intra_period = pSequenceParameter->intra_period;
125     int ip_period = pSequenceParameter->ip_period;
126     double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
127     double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
128     double bpf;
129
130     if (!encode_state->misc_param[VAEncMiscParameterTypeHRD] || !encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer)
131         return;
132
133     pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
134     pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
135
136     if (pSequenceParameter->ip_period) {
137         pnum = (intra_period + ip_period - 1)/ip_period - 1;
138         bnum = intra_period - inum - pnum;
139     }
140
141     mfc_context->brc.mode = encoder_context->rate_control_mode;
142
143     mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
144                                                              (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
145     mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
146     mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
147
148     mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
149     mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
150     mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
151
152     bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
153
154     mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
155     mfc_context->hrd.current_buffer_fullness =
156         (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
157         pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
158     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
159     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
160     mfc_context->hrd.violation_noted = 0;
161
162     if ((bpf > qp51_size) && (bpf < qp1_size)) {
163         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
164     }
165     else if (bpf >= qp1_size)
166         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
167     else if (bpf <= qp51_size)
168         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
169
170     mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
171     mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
172
173     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
174     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
175     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
176 }
177
178 int intel_mfc_update_hrd(struct encode_state *encode_state,
179                          struct gen6_mfc_context *mfc_context,
180                          int frame_bits)
181 {
182     double prev_bf = mfc_context->hrd.current_buffer_fullness;
183
184     mfc_context->hrd.current_buffer_fullness -= frame_bits;
185
186     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
187         mfc_context->hrd.current_buffer_fullness = prev_bf;
188         return BRC_UNDERFLOW;
189     }
190     
191     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
192     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
193         if (mfc_context->brc.mode == VA_RC_VBR)
194             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
195         else {
196             mfc_context->hrd.current_buffer_fullness = prev_bf;
197             return BRC_OVERFLOW;
198         }
199     }
200     return BRC_NO_HRD_VIOLATION;
201 }
202
203 int intel_mfc_brc_postpack(struct encode_state *encode_state,
204                            struct gen6_mfc_context *mfc_context,
205                            int frame_bits)
206 {
207     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
208     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
209     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
210     int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
211     int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
212     int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
213     int qp; // quantizer of previously encoded slice of current type
214     int qpn; // predicted quantizer for next frame of current type in integer format
215     double qpf; // predicted quantizer for next frame of current type in float format
216     double delta_qp; // QP correction
217     int target_frame_size, frame_size_next;
218     /* Notes:
219      *  x - how far we are from HRD buffer borders
220      *  y - how far we are from target HRD buffer fullness
221      */
222     double x, y;
223     double frame_size_alpha;
224
225     qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
226
227     target_frame_size = mfc_context->brc.target_frame_size[slicetype];
228     if (mfc_context->hrd.buffer_capacity < 5)
229         frame_size_alpha = 0;
230     else
231         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
232     if (frame_size_alpha > 30) frame_size_alpha = 30;
233     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
234         (double)(frame_size_alpha + 1.);
235
236     /* frame_size_next: avoiding negative number and too small value */
237     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
238         frame_size_next = (int)((double)target_frame_size * 0.25);
239
240     qpf = (double)qp * target_frame_size / frame_size_next;
241     qpn = (int)(qpf + 0.5);
242
243     if (qpn == qp) {
244         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
245         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
246         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
247             qpn++;
248             mfc_context->brc.qpf_rounding_accumulator = 0.;
249         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
250             qpn--;
251             mfc_context->brc.qpf_rounding_accumulator = 0.;
252         }
253     }
254     /* making sure that QP is not changing too fast */
255     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
256     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
257     /* making sure that with QP predictions we did do not leave QPs range */
258     BRC_CLIP(qpn, 1, 51);
259
260     /* checking wthether HRD compliance is still met */
261     sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits);
262
263     /* calculating QP delta as some function*/
264     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
265     if (x > 0) {
266         x /= mfc_context->hrd.target_buffer_fullness;
267         y = mfc_context->hrd.current_buffer_fullness;
268     }
269     else {
270         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
271         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
272     }
273     if (y < 0.01) y = 0.01;
274     if (x > 1) x = 1;
275     else if (x < -1) x = -1;
276
277     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
278     qpn = (int)(qpn + delta_qp + 0.5);
279
280     /* making sure that with QP predictions we did do not leave QPs range */
281     BRC_CLIP(qpn, 1, 51);
282
283     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
284         /* correcting QPs of slices of other types */
285         if (slicetype == SLICE_TYPE_P) {
286             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
287                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
288             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
289                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
290         } else if (slicetype == SLICE_TYPE_I) {
291             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
292                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
293             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
294                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
295         } else { // SLICE_TYPE_B
296             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
297                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
298             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
299                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
300         }
301         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
302         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
303         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
304     } else if (sts == BRC_UNDERFLOW) { // underflow
305         if (qpn <= qp) qpn = qp + 1;
306         if (qpn > 51) {
307             qpn = 51;
308             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
309         }
310     } else if (sts == BRC_OVERFLOW) {
311         if (qpn >= qp) qpn = qp - 1;
312         if (qpn < 1) { // < 0 (?) overflow with minQP
313             qpn = 1;
314             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
315         }
316     }
317
318     mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
319
320     return sts;
321 }
322
323 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
324                                        struct intel_encoder_context *encoder_context)
325 {
326     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
327     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
328     unsigned int rate_control_mode = encoder_context->rate_control_mode;
329     int target_bit_rate = pSequenceParameter->bits_per_second;
330     
331     // current we only support CBR mode.
332     if (rate_control_mode == VA_RC_CBR) {
333         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
334         mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
335         mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
336         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
337         mfc_context->vui_hrd.i_frame_number = 0;
338
339         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; 
340         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
341         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
342     }
343
344 }
345
346 void 
347 intel_mfc_hrd_context_update(struct encode_state *encode_state, 
348                              struct gen6_mfc_context *mfc_context)
349 {
350     mfc_context->vui_hrd.i_frame_number++;
351 }
352
353 int intel_mfc_interlace_check(VADriverContextP ctx,
354                               struct encode_state *encode_state,
355                               struct intel_encoder_context *encoder_context)
356 {
357     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
358     VAEncSliceParameterBufferH264 *pSliceParameter;
359     int i;
360     int mbCount = 0;
361     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
362     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
363   
364     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
365         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer; 
366         mbCount += pSliceParameter->num_macroblocks; 
367     }
368     
369     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
370         return 0;
371
372     return 1;
373 }
374
375 /*
376  * Check whether the parameters related with CBR are updated and decide whether
377  * it needs to reinitialize the configuration related with CBR.
378  * Currently it will check the following parameters:
379  *      bits_per_second
380  *      frame_rate
381  *      gop_configuration(intra_period, ip_period, intra_idr_period)
382  */
383 static bool intel_mfc_brc_updated_check(struct encode_state *encode_state,
384                            struct intel_encoder_context *encoder_context)
385 {
386     unsigned int rate_control_mode = encoder_context->rate_control_mode;
387     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
388     double cur_fps, cur_bitrate;
389     VAEncSequenceParameterBufferH264 *pSequenceParameter;
390
391
392     if (rate_control_mode != VA_RC_CBR) {
393         return false;
394     }
395
396     pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
397
398     cur_bitrate = pSequenceParameter->bits_per_second;
399     cur_fps = (double)pSequenceParameter->time_scale /
400                 (2 * (double)pSequenceParameter->num_units_in_tick);
401
402     if ((cur_bitrate == mfc_context->brc.saved_bps) &&
403         (cur_fps == mfc_context->brc.saved_fps) &&
404         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) &&
405         (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) &&
406         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) {
407         /* the parameters related with CBR are not updaetd */
408         return false;
409     }
410
411     mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period;
412     mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period;
413     mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period;
414     mfc_context->brc.saved_fps = cur_fps;
415     mfc_context->brc.saved_bps = cur_bitrate;
416     return true;
417 }
418
419 void intel_mfc_brc_prepare(struct encode_state *encode_state,
420                            struct intel_encoder_context *encoder_context)
421 {
422     unsigned int rate_control_mode = encoder_context->rate_control_mode;
423     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
424
425     if (rate_control_mode == VA_RC_CBR) {
426         bool brc_updated;
427         assert(encoder_context->codec != CODEC_MPEG2);
428
429         brc_updated = intel_mfc_brc_updated_check(encode_state, encoder_context);
430
431         /*Programing bit rate control */
432         if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) ||
433              brc_updated) {
434             intel_mfc_bit_rate_control_context_init(encode_state, mfc_context);
435             intel_mfc_brc_init(encode_state, encoder_context);
436         }
437
438         /*Programing HRD control */
439         if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated )
440             intel_mfc_hrd_context_init(encode_state, encoder_context);    
441     }
442 }
443
444 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
445                                               struct encode_state *encode_state,
446                                               struct intel_encoder_context *encoder_context,
447                                               struct intel_batchbuffer *slice_batch)
448 {
449     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
450     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
451     unsigned int rate_control_mode = encoder_context->rate_control_mode;
452     unsigned int skip_emul_byte_cnt;
453
454     if (encode_state->packed_header_data[idx]) {
455         VAEncPackedHeaderParameterBuffer *param = NULL;
456         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
457         unsigned int length_in_bits;
458
459         assert(encode_state->packed_header_param[idx]);
460         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
461         length_in_bits = param->bit_length;
462
463         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
464         mfc_context->insert_object(ctx,
465                                    encoder_context,
466                                    header_data,
467                                    ALIGN(length_in_bits, 32) >> 5,
468                                    length_in_bits & 0x1f,
469                                    skip_emul_byte_cnt,
470                                    0,
471                                    0,
472                                    !param->has_emulation_bytes,
473                                    slice_batch);
474     }
475
476     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
477
478     if (encode_state->packed_header_data[idx]) {
479         VAEncPackedHeaderParameterBuffer *param = NULL;
480         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
481         unsigned int length_in_bits;
482
483         assert(encode_state->packed_header_param[idx]);
484         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
485         length_in_bits = param->bit_length;
486
487         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
488
489         mfc_context->insert_object(ctx,
490                                    encoder_context,
491                                    header_data,
492                                    ALIGN(length_in_bits, 32) >> 5,
493                                    length_in_bits & 0x1f,
494                                    skip_emul_byte_cnt,
495                                    0,
496                                    0,
497                                    !param->has_emulation_bytes,
498                                    slice_batch);
499     }
500     
501     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
502
503     if (encode_state->packed_header_data[idx]) {
504         VAEncPackedHeaderParameterBuffer *param = NULL;
505         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
506         unsigned int length_in_bits;
507
508         assert(encode_state->packed_header_param[idx]);
509         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
510         length_in_bits = param->bit_length;
511
512         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
513         mfc_context->insert_object(ctx,
514                                    encoder_context,
515                                    header_data,
516                                    ALIGN(length_in_bits, 32) >> 5,
517                                    length_in_bits & 0x1f,
518                                    skip_emul_byte_cnt,
519                                    0,
520                                    0,
521                                    !param->has_emulation_bytes,
522                                    slice_batch);
523     } else if (rate_control_mode == VA_RC_CBR) {
524         // this is frist AU
525         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
526
527         unsigned char *sei_data = NULL;
528     
529         int length_in_bits = build_avc_sei_buffer_timing(
530             mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
531             mfc_context->vui_hrd.i_initial_cpb_removal_delay,
532             0,
533             mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
534             mfc_context->vui_hrd.i_dpb_output_delay_length,
535             0,
536             &sei_data);
537         mfc_context->insert_object(ctx,
538                                    encoder_context,
539                                    (unsigned int *)sei_data,
540                                    ALIGN(length_in_bits, 32) >> 5,
541                                    length_in_bits & 0x1f,
542                                    5,
543                                    0,   
544                                    0,   
545                                    1,
546                                    slice_batch);  
547         free(sei_data);
548     }
549 }
550
551 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, 
552                                struct encode_state *encode_state,
553                                struct intel_encoder_context *encoder_context)
554 {
555     struct i965_driver_data *i965 = i965_driver_data(ctx);
556     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
557     struct object_surface *obj_surface; 
558     struct object_buffer *obj_buffer;
559     GenAvcSurface *gen6_avc_surface;
560     dri_bo *bo;
561     VAStatus vaStatus = VA_STATUS_SUCCESS;
562     int i, j, enable_avc_ildb = 0;
563     VAEncSliceParameterBufferH264 *slice_param;
564     struct i965_coded_buffer_segment *coded_buffer_segment;
565     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
566     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
567     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
568
569     if (IS_GEN6(i965->intel.device_info)) {
570         /* On the SNB it should be fixed to 128 for the DMV buffer */
571         width_in_mbs = 128;
572     }
573
574     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
575         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
576         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
577
578         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
579             assert((slice_param->slice_type == SLICE_TYPE_I) ||
580                    (slice_param->slice_type == SLICE_TYPE_SI) ||
581                    (slice_param->slice_type == SLICE_TYPE_P) ||
582                    (slice_param->slice_type == SLICE_TYPE_SP) ||
583                    (slice_param->slice_type == SLICE_TYPE_B));
584
585             if (slice_param->disable_deblocking_filter_idc != 1) {
586                 enable_avc_ildb = 1;
587                 break;
588             }
589
590             slice_param++;
591         }
592     }
593
594     /*Setup all the input&output object*/
595
596     /* Setup current frame and current direct mv buffer*/
597     obj_surface = encode_state->reconstructed_object;
598     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
599
600     if ( obj_surface->private_data == NULL) {
601         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
602         assert(gen6_avc_surface);
603         gen6_avc_surface->dmv_top = 
604             dri_bo_alloc(i965->intel.bufmgr,
605                          "Buffer",
606                          68 * width_in_mbs * height_in_mbs, 
607                          64);
608         gen6_avc_surface->dmv_bottom = 
609             dri_bo_alloc(i965->intel.bufmgr,
610                          "Buffer",
611                          68 * width_in_mbs * height_in_mbs, 
612                          64);
613         assert(gen6_avc_surface->dmv_top);
614         assert(gen6_avc_surface->dmv_bottom);
615         obj_surface->private_data = (void *)gen6_avc_surface;
616         obj_surface->free_private_data = (void *)gen_free_avc_surface; 
617     }
618     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
619     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
620     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
621     dri_bo_reference(gen6_avc_surface->dmv_top);
622     dri_bo_reference(gen6_avc_surface->dmv_bottom);
623
624     if (enable_avc_ildb) {
625         mfc_context->post_deblocking_output.bo = obj_surface->bo;
626         dri_bo_reference(mfc_context->post_deblocking_output.bo);
627     } else {
628         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
629         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
630     }
631
632     mfc_context->surface_state.width = obj_surface->orig_width;
633     mfc_context->surface_state.height = obj_surface->orig_height;
634     mfc_context->surface_state.w_pitch = obj_surface->width;
635     mfc_context->surface_state.h_pitch = obj_surface->height;
636     
637     /* Setup reference frames and direct mv buffers*/
638     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
639         obj_surface = encode_state->reference_objects[i];
640         
641         if (obj_surface && obj_surface->bo) {
642             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
643             dri_bo_reference(obj_surface->bo);
644
645             /* Check DMV buffer */
646             if ( obj_surface->private_data == NULL) {
647                 
648                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
649                 assert(gen6_avc_surface);
650                 gen6_avc_surface->dmv_top = 
651                     dri_bo_alloc(i965->intel.bufmgr,
652                                  "Buffer",
653                                  68 * width_in_mbs * height_in_mbs, 
654                                  64);
655                 gen6_avc_surface->dmv_bottom = 
656                     dri_bo_alloc(i965->intel.bufmgr,
657                                  "Buffer",
658                                  68 * width_in_mbs * height_in_mbs, 
659                                  64);
660                 assert(gen6_avc_surface->dmv_top);
661                 assert(gen6_avc_surface->dmv_bottom);
662                 obj_surface->private_data = gen6_avc_surface;
663                 obj_surface->free_private_data = gen_free_avc_surface; 
664             }
665     
666             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
667             /* Setup DMV buffer */
668             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
669             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
670             dri_bo_reference(gen6_avc_surface->dmv_top);
671             dri_bo_reference(gen6_avc_surface->dmv_bottom);
672         } else {
673             break;
674         }
675     }
676
677     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
678     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
679
680     obj_buffer = encode_state->coded_buf_object;
681     bo = obj_buffer->buffer_store->bo;
682     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
683     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
684     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
685     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
686     
687     dri_bo_map(bo, 1);
688     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
689     coded_buffer_segment->mapped = 0;
690     coded_buffer_segment->codec = encoder_context->codec;
691     dri_bo_unmap(bo);
692
693     return vaStatus;
694 }
695 /*
696  * The LUT uses the pair of 4-bit units: (shift, base) structure.
697  * 2^K * X = value . 
698  * So it is necessary to convert one cost into the nearest LUT format.
699  * The derivation is:
700  * 2^K *x = 2^n * (1 + deltaX)
701  *    k + log2(x) = n + log2(1 + deltaX)
702  *    log2(x) = n - k + log2(1 + deltaX)
703  *    As X is in the range of [1, 15]
704  *      4 > n - k + log2(1 + deltaX) >= 0 
705  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
706  *    Then we can derive the corresponding K and get the nearest LUT format.
707  */
708 int intel_format_lutvalue(int value, int max)
709 {
710     int ret;
711     int logvalue, temp1, temp2;
712
713     if (value <= 0)
714         return 0;
715
716     logvalue = (int)(log2f((float)value));
717     if (logvalue < 4) {
718         ret = value;
719     } else {
720         int error, temp_value, base, j, temp_err;
721         error = value;
722         j = logvalue - 4 + 1;
723         ret = -1;
724         for(; j <= logvalue; j++) {
725             if (j == 0) {
726                 base = value >> j;
727             } else {
728                 base = (value + (1 << (j - 1)) - 1) >> j;
729             }
730             if (base >= 16)
731                 continue;
732
733             temp_value = base << j;
734             temp_err = abs(value - temp_value);
735             if (temp_err < error) {
736                 error = temp_err;
737                 ret = (j << 4) | base;
738                 if (temp_err == 0)
739                     break;
740             }
741         }
742     }
743     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
744     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
745     if (temp1 > temp2)
746         ret = max;
747     return ret;
748
749 }
750
751
752 #define         QP_MAX                  52
753 #define         VP8_QP_MAX              128
754
755
756 static float intel_lambda_qp(int qp)
757 {
758     float value, lambdaf;
759     value = qp;
760     value = value / 6 - 2;
761     if (value < 0)
762         value = 0;
763     lambdaf = roundf(powf(2, value));
764     return lambdaf;
765 }
766
767 static
768 void intel_h264_calc_mbmvcost_qp(int qp,
769                                  int slice_type,
770                                  uint8_t *vme_state_message)
771 {
772     int m_cost, j, mv_count;
773     float   lambda, m_costf;
774
775     assert(qp <= QP_MAX); 
776     lambda = intel_lambda_qp(qp);
777
778     m_cost = lambda;
779     vme_state_message[MODE_CHROMA_INTRA] = 0;
780     vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
781
782     if (slice_type == SLICE_TYPE_I) {
783         vme_state_message[MODE_INTRA_16X16] = 0;
784         m_cost = lambda * 4;
785         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
786         m_cost = lambda * 16; 
787         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
788         m_cost = lambda * 3;
789         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
790     } else {
791         m_cost = 0;
792         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
793         for (j = 1; j < 3; j++) {
794             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
795             m_cost = (int)m_costf;
796             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
797         }
798         mv_count = 3;
799         for (j = 4; j <= 64; j *= 2) {
800             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
801             m_cost = (int)m_costf;
802             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
803             mv_count++;
804         }
805
806         if (qp <= 25) {
807             vme_state_message[MODE_INTRA_16X16] = 0x4a;
808             vme_state_message[MODE_INTRA_8X8] = 0x4a;
809             vme_state_message[MODE_INTRA_4X4] = 0x4a;
810             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
811             vme_state_message[MODE_INTER_16X16] = 0x4a;
812             vme_state_message[MODE_INTER_16X8] = 0x4a;
813             vme_state_message[MODE_INTER_8X8] = 0x4a;
814             vme_state_message[MODE_INTER_8X4] = 0x4a;
815             vme_state_message[MODE_INTER_4X4] = 0x4a;
816             vme_state_message[MODE_INTER_BWD] = 0x2a;
817             return;
818         }
819         m_costf = lambda * 10;
820         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
821         m_cost = lambda * 14;
822         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
823         m_cost = lambda * 24; 
824         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
825         m_costf = lambda * 3.5;
826         m_cost = m_costf;
827         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
828         if (slice_type == SLICE_TYPE_P) {
829             m_costf = lambda * 2.5;
830             m_cost = m_costf;
831             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
832             m_costf = lambda * 4;
833             m_cost = m_costf;
834             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
835             m_costf = lambda * 1.5;
836             m_cost = m_costf;
837             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
838             m_costf = lambda * 3;
839             m_cost = m_costf;
840             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
841             m_costf = lambda * 5;
842             m_cost = m_costf;
843             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
844             /* BWD is not used in P-frame */
845             vme_state_message[MODE_INTER_BWD] = 0;
846         } else {
847             m_costf = lambda * 2.5;
848             m_cost = m_costf;
849             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
850             m_costf = lambda * 5.5;
851             m_cost = m_costf;
852             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
853             m_costf = lambda * 3.5;
854             m_cost = m_costf;
855             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
856             m_costf = lambda * 5.0;
857             m_cost = m_costf;
858             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
859             m_costf = lambda * 6.5;
860             m_cost = m_costf;
861             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
862             m_costf = lambda * 1.5;
863             m_cost = m_costf;
864             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
865         }
866     }
867     return;
868 }
869
870 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
871                                 struct encode_state *encode_state,
872                                 struct intel_encoder_context *encoder_context)
873 {
874     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
875     struct gen6_vme_context *vme_context = encoder_context->vme_context;
876     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
877     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
878     int qp;
879     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
880
881     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
882
883     if (encoder_context->rate_control_mode == VA_RC_CQP)
884         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
885     else
886         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
887
888     if (vme_state_message == NULL)
889         return;
890
891     intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
892 }
893
894 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
895                                 struct encode_state *encode_state,
896                                 struct intel_encoder_context *encoder_context)
897 {
898     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
899     struct gen6_vme_context *vme_context = encoder_context->vme_context;
900     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
901     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
902     int qp, m_cost, j, mv_count;
903     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
904     float   lambda, m_costf;
905
906     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
907     int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
908   
909     if (vme_state_message == NULL)
910         return;
911  
912     if (encoder_context->rate_control_mode == VA_RC_CQP)
913         qp = q_matrix->quantization_index[0];
914     else
915         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
916
917     lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
918
919     m_cost = lambda;
920     vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
921
922     if (is_key_frame) {
923         vme_state_message[MODE_INTRA_16X16] = 0;
924         m_cost = lambda * 16; 
925         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
926         m_cost = lambda * 3;
927         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
928     } else {
929         m_cost = 0;
930         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
931         for (j = 1; j < 3; j++) {
932             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
933             m_cost = (int)m_costf;
934             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
935         }
936         mv_count = 3;
937         for (j = 4; j <= 64; j *= 2) {
938             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
939             m_cost = (int)m_costf;
940             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
941             mv_count++;
942         }
943
944         if (qp < 92 ) {
945             vme_state_message[MODE_INTRA_16X16] = 0x4a;
946             vme_state_message[MODE_INTRA_4X4] = 0x4a;
947             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
948             vme_state_message[MODE_INTER_16X16] = 0x4a;
949             vme_state_message[MODE_INTER_16X8] = 0x4a;
950             vme_state_message[MODE_INTER_8X8] = 0x4a;
951             vme_state_message[MODE_INTER_4X4] = 0x4a;
952             vme_state_message[MODE_INTER_BWD] = 0;
953             return;
954         }
955         m_costf = lambda * 10;
956         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
957         m_cost = lambda * 24; 
958         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
959             
960         m_costf = lambda * 3.5;
961         m_cost = m_costf;
962         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
963
964         m_costf = lambda * 2.5;
965         m_cost = m_costf;
966         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
967         m_costf = lambda * 4;
968         m_cost = m_costf;
969         vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
970         m_costf = lambda * 1.5;
971         m_cost = m_costf;
972         vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
973         m_costf = lambda * 5;
974         m_cost = m_costf;
975         vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
976         /* BWD is not used in P-frame */
977         vme_state_message[MODE_INTER_BWD] = 0;
978     }
979 }
980
981 #define         MB_SCOREBOARD_A         (1 << 0)
982 #define         MB_SCOREBOARD_B         (1 << 1)
983 #define         MB_SCOREBOARD_C         (1 << 2)
984 void 
985 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
986 {
987     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
988     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
989     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
990                                                            MB_SCOREBOARD_B |
991                                                            MB_SCOREBOARD_C);
992
993     /* In VME prediction the current mb depends on the neighbour 
994      * A/B/C macroblock. So the left/up/up-right dependency should
995      * be considered.
996      */
997     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
998     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
999     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
1000     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
1001     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
1002     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
1003
1004     vme_context->gpe_context.vfe_desc7.dword = 0;
1005     return;
1006 }
1007
1008 /* check whether the mb of (x_index, y_index) is out of bound */
1009 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
1010 {
1011     int mb_index;
1012     if (x_index < 0 || x_index >= mb_width)
1013         return -1;
1014     if (y_index < 0 || y_index >= mb_height)
1015         return -1;
1016
1017     mb_index = y_index * mb_width + x_index;
1018     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
1019         return -1;
1020     return 0;
1021 }
1022
1023 void
1024 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1025                                      struct encode_state *encode_state,
1026                                      int mb_width, int mb_height,
1027                                      int kernel,
1028                                      int transform_8x8_mode_flag,
1029                                      struct intel_encoder_context *encoder_context)
1030 {
1031     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1032     int mb_row;
1033     int s;
1034     unsigned int *command_ptr;
1035     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1036     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1037     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1038     int qp,qp_mb,qp_index;
1039     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1040
1041     if (encoder_context->rate_control_mode == VA_RC_CQP)
1042         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1043     else
1044         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1045
1046 #define         USE_SCOREBOARD          (1 << 21)
1047  
1048     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1049     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1050
1051     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1052         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1053         int first_mb = pSliceParameter->macroblock_address;
1054         int num_mb = pSliceParameter->num_macroblocks;
1055         unsigned int mb_intra_ub, score_dep;
1056         int x_outer, y_outer, x_inner, y_inner;
1057         int xtemp_outer = 0;
1058
1059         x_outer = first_mb % mb_width;
1060         y_outer = first_mb / mb_width;
1061         mb_row = y_outer;
1062
1063         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1064             x_inner = x_outer;
1065             y_inner = y_outer;
1066             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1067                 mb_intra_ub = 0;
1068                 score_dep = 0;
1069                 if (x_inner != 0) {
1070                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1071                     score_dep |= MB_SCOREBOARD_A; 
1072                 }
1073                 if (y_inner != mb_row) {
1074                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1075                     score_dep |= MB_SCOREBOARD_B;
1076                     if (x_inner != 0)
1077                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1078                     if (x_inner != (mb_width -1)) {
1079                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1080                         score_dep |= MB_SCOREBOARD_C;
1081                     }
1082                 }
1083
1084                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1085                 *command_ptr++ = kernel;
1086                 *command_ptr++ = USE_SCOREBOARD;
1087                 /* Indirect data */
1088                 *command_ptr++ = 0;
1089                 /* the (X, Y) term of scoreboard */
1090                 *command_ptr++ = ((y_inner << 16) | x_inner);
1091                 *command_ptr++ = score_dep;
1092                 /*inline data */
1093                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1094                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1095                 /* QP occupies one byte */
1096                 if (vme_context->roi_enabled) {
1097                     qp_index = y_inner * mb_width + x_inner;
1098                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1099                 } else
1100                     qp_mb = qp;
1101                 *command_ptr++ = qp_mb;
1102                 x_inner -= 2;
1103                 y_inner += 1;
1104             }
1105             x_outer += 1;
1106         }
1107
1108         xtemp_outer = mb_width - 2;
1109         if (xtemp_outer < 0)
1110             xtemp_outer = 0;
1111         x_outer = xtemp_outer;
1112         y_outer = first_mb / mb_width;
1113         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1114             y_inner = y_outer;
1115             x_inner = x_outer;
1116             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1117                 mb_intra_ub = 0;
1118                 score_dep = 0;
1119                 if (x_inner != 0) {
1120                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1121                     score_dep |= MB_SCOREBOARD_A; 
1122                 }
1123                 if (y_inner != mb_row) {
1124                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1125                     score_dep |= MB_SCOREBOARD_B;
1126                     if (x_inner != 0)
1127                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1128
1129                     if (x_inner != (mb_width -1)) {
1130                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1131                         score_dep |= MB_SCOREBOARD_C;
1132                     }
1133                 }
1134
1135                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1136                 *command_ptr++ = kernel;
1137                 *command_ptr++ = USE_SCOREBOARD;
1138                 /* Indirect data */
1139                 *command_ptr++ = 0;
1140                 /* the (X, Y) term of scoreboard */
1141                 *command_ptr++ = ((y_inner << 16) | x_inner);
1142                 *command_ptr++ = score_dep;
1143                 /*inline data */
1144                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1145                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1146                 /* qp occupies one byte */
1147                 if (vme_context->roi_enabled) {
1148                     qp_index = y_inner * mb_width + x_inner;
1149                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1150                 } else
1151                     qp_mb = qp;
1152                 *command_ptr++ = qp_mb;
1153
1154                 x_inner -= 2;
1155                 y_inner += 1;
1156             }
1157             x_outer++;
1158             if (x_outer >= mb_width) {
1159                 y_outer += 1;
1160                 x_outer = xtemp_outer;
1161             }           
1162         }
1163     }
1164
1165     *command_ptr++ = 0;
1166     *command_ptr++ = MI_BATCH_BUFFER_END;
1167
1168     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1169 }
1170
1171 static uint8_t
1172 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1173 {
1174     unsigned int is_long_term =
1175         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1176     unsigned int is_top_field =
1177         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1178     unsigned int is_bottom_field =
1179         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1180
1181     return ((is_long_term                         << 6) |
1182             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1183             (frame_store_id                       << 1) |
1184             ((is_top_field ^ 1) & is_bottom_field));
1185 }
1186
1187 void
1188 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1189                             struct encode_state *encode_state,
1190                             struct intel_encoder_context *encoder_context)
1191 {
1192     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1193     struct intel_batchbuffer *batch = encoder_context->base.batch;
1194     int slice_type;
1195     struct object_surface *obj_surface;
1196     unsigned int fref_entry, bref_entry;
1197     int frame_index, i;
1198     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1199
1200     fref_entry = 0x80808080;
1201     bref_entry = 0x80808080;
1202     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1203
1204     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1205         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1206
1207         if (ref_idx_l0 > 3) {
1208             WARN_ONCE("ref_idx_l0 is out of range\n");
1209             ref_idx_l0 = 0;
1210         }
1211
1212         obj_surface = vme_context->used_reference_objects[0];
1213         frame_index = -1;
1214         for (i = 0; i < 16; i++) {
1215             if (obj_surface &&
1216                 obj_surface == encode_state->reference_objects[i]) {
1217                 frame_index = i;
1218                 break;
1219             }
1220         }
1221         if (frame_index == -1) {
1222             WARN_ONCE("RefPicList0 is not found in DPB!\n");
1223         } else {
1224             int ref_idx_l0_shift = ref_idx_l0 * 8;
1225             fref_entry &= ~(0xFF << ref_idx_l0_shift);
1226             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1227         }
1228     }
1229
1230     if (slice_type == SLICE_TYPE_B) {
1231         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1232
1233         if (ref_idx_l1 > 3) {
1234             WARN_ONCE("ref_idx_l1 is out of range\n");
1235             ref_idx_l1 = 0;
1236         }
1237
1238         obj_surface = vme_context->used_reference_objects[1];
1239         frame_index = -1;
1240         for (i = 0; i < 16; i++) {
1241             if (obj_surface &&
1242                 obj_surface == encode_state->reference_objects[i]) {
1243                 frame_index = i;
1244                 break;
1245             }
1246         }
1247         if (frame_index == -1) {
1248             WARN_ONCE("RefPicList1 is not found in DPB!\n");
1249         } else {
1250             int ref_idx_l1_shift = ref_idx_l1 * 8;
1251             bref_entry &= ~(0xFF << ref_idx_l1_shift);
1252             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1253         }
1254     }
1255
1256     BEGIN_BCS_BATCH(batch, 10);
1257     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1258     OUT_BCS_BATCH(batch, 0);                  //Select L0
1259     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1260     for(i = 0; i < 7; i++) {
1261         OUT_BCS_BATCH(batch, 0x80808080);
1262     }
1263     ADVANCE_BCS_BATCH(batch);
1264
1265     BEGIN_BCS_BATCH(batch, 10);
1266     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1267     OUT_BCS_BATCH(batch, 1);                  //Select L1
1268     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1269     for(i = 0; i < 7; i++) {
1270         OUT_BCS_BATCH(batch, 0x80808080);
1271     }
1272     ADVANCE_BCS_BATCH(batch);
1273 }
1274
1275
1276 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1277                                  struct encode_state *encode_state,
1278                                  struct intel_encoder_context *encoder_context)
1279 {
1280     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1281     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1282     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1283     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1284     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1285     uint32_t mv_x, mv_y;
1286     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1287     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1288     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1289
1290     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1291         mv_x = 512;
1292         mv_y = 64;
1293     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1294         mv_x = 1024;
1295         mv_y = 128;
1296     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1297         mv_x = 2048;
1298         mv_y = 128;
1299     } else {
1300         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1301         mv_x = 512;
1302         mv_y = 64;
1303     }
1304
1305     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1306     if (pic_param->picture_type != VAEncPictureTypeIntra) {
1307         int qp, m_cost, j, mv_count;
1308         float   lambda, m_costf;
1309         slice_param = (VAEncSliceParameterBufferMPEG2 *)
1310             encode_state->slice_params_ext[0]->buffer;
1311         qp = slice_param->quantiser_scale_code;
1312         lambda = intel_lambda_qp(qp);
1313         /* No Intra prediction. So it is zero */
1314         vme_state_message[MODE_INTRA_8X8] = 0;
1315         vme_state_message[MODE_INTRA_4X4] = 0;
1316         vme_state_message[MODE_INTER_MV0] = 0;
1317         for (j = 1; j < 3; j++) {
1318             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1319             m_cost = (int)m_costf;
1320             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1321         }
1322         mv_count = 3;
1323         for (j = 4; j <= 64; j *= 2) {
1324             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1325             m_cost = (int)m_costf;
1326             vme_state_message[MODE_INTER_MV0 + mv_count] =
1327                 intel_format_lutvalue(m_cost, 0x6f);
1328             mv_count++;
1329         }
1330         m_cost = lambda;
1331         /* It can only perform the 16x16 search. So mode cost can be ignored for
1332          * the other mode. for example: 16x8/8x8
1333          */
1334         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1335         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1336
1337         vme_state_message[MODE_INTER_16X8] = 0;
1338         vme_state_message[MODE_INTER_8X8] = 0;
1339         vme_state_message[MODE_INTER_8X4] = 0;
1340         vme_state_message[MODE_INTER_4X4] = 0;
1341         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1342
1343     }
1344     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1345
1346     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1347         width_in_mbs;
1348 }
1349
1350 void
1351 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1352                                            struct encode_state *encode_state,
1353                                            int mb_width, int mb_height,
1354                                            int kernel,
1355                                            struct intel_encoder_context *encoder_context)
1356 {
1357     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1358     unsigned int *command_ptr;
1359
1360 #define         MPEG2_SCOREBOARD                (1 << 21)
1361
1362     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1363     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1364
1365     {
1366         unsigned int mb_intra_ub, score_dep;
1367         int x_outer, y_outer, x_inner, y_inner;
1368         int xtemp_outer = 0;
1369         int first_mb = 0;
1370         int num_mb = mb_width * mb_height;
1371
1372         x_outer = 0;
1373         y_outer = 0;
1374
1375
1376         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1377             x_inner = x_outer;
1378             y_inner = y_outer;
1379             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1380                 mb_intra_ub = 0;
1381                 score_dep = 0;
1382                 if (x_inner != 0) {
1383                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1384                     score_dep |= MB_SCOREBOARD_A; 
1385                 }
1386                 if (y_inner != 0) {
1387                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1388                     score_dep |= MB_SCOREBOARD_B;
1389
1390                     if (x_inner != 0)
1391                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1392
1393                     if (x_inner != (mb_width -1)) {
1394                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1395                         score_dep |= MB_SCOREBOARD_C;
1396                     }
1397                 }
1398
1399                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1400                 *command_ptr++ = kernel;
1401                 *command_ptr++ = MPEG2_SCOREBOARD;
1402                 /* Indirect data */
1403                 *command_ptr++ = 0;
1404                 /* the (X, Y) term of scoreboard */
1405                 *command_ptr++ = ((y_inner << 16) | x_inner);
1406                 *command_ptr++ = score_dep;
1407                 /*inline data */
1408                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1409                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1410                 x_inner -= 2;
1411                 y_inner += 1;
1412             }
1413             x_outer += 1;
1414         }
1415
1416         xtemp_outer = mb_width - 2;
1417         if (xtemp_outer < 0)
1418             xtemp_outer = 0;
1419         x_outer = xtemp_outer;
1420         y_outer = 0;
1421         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1422             y_inner = y_outer;
1423             x_inner = x_outer;
1424             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1425                 mb_intra_ub = 0;
1426                 score_dep = 0;
1427                 if (x_inner != 0) {
1428                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1429                     score_dep |= MB_SCOREBOARD_A; 
1430                 }
1431                 if (y_inner != 0) {
1432                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1433                     score_dep |= MB_SCOREBOARD_B;
1434
1435                     if (x_inner != 0)
1436                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1437
1438                     if (x_inner != (mb_width -1)) {
1439                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1440                         score_dep |= MB_SCOREBOARD_C;
1441                     }
1442                 }
1443
1444                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1445                 *command_ptr++ = kernel;
1446                 *command_ptr++ = MPEG2_SCOREBOARD;
1447                 /* Indirect data */
1448                 *command_ptr++ = 0;
1449                 /* the (X, Y) term of scoreboard */
1450                 *command_ptr++ = ((y_inner << 16) | x_inner);
1451                 *command_ptr++ = score_dep;
1452                 /*inline data */
1453                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1454                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1455
1456                 x_inner -= 2;
1457                 y_inner += 1;
1458             }
1459             x_outer++;
1460             if (x_outer >= mb_width) {
1461                 y_outer += 1;
1462                 x_outer = xtemp_outer;
1463             }           
1464         }
1465     }
1466
1467     *command_ptr++ = 0;
1468     *command_ptr++ = MI_BATCH_BUFFER_END;
1469
1470     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1471     return;
1472 }
1473
1474 static int
1475 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1476                           VAPictureH264 *ref_list,
1477                           int num_pictures,
1478                           int dir)
1479 {
1480     int i, found = -1, min = 0x7FFFFFFF;
1481
1482     for (i = 0; i < num_pictures; i++) {
1483         int tmp;
1484
1485         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1486             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1487             break;
1488
1489         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1490
1491         if (dir)
1492             tmp = -tmp;
1493
1494         if (tmp > 0 && tmp < min) {
1495             min = tmp;
1496             found = i;
1497         }
1498     }
1499
1500     return found;
1501 }
1502
1503 void
1504 intel_avc_vme_reference_state(VADriverContextP ctx,
1505                               struct encode_state *encode_state,
1506                               struct intel_encoder_context *encoder_context,
1507                               int list_index,
1508                               int surface_index,
1509                               void (* vme_source_surface_state)(
1510                                   VADriverContextP ctx,
1511                                   int index,
1512                                   struct object_surface *obj_surface,
1513                                   struct intel_encoder_context *encoder_context))
1514 {
1515     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1516     struct object_surface *obj_surface = NULL;
1517     struct i965_driver_data *i965 = i965_driver_data(ctx);
1518     VASurfaceID ref_surface_id;
1519     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1520     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1521     int max_num_references;
1522     VAPictureH264 *curr_pic;
1523     VAPictureH264 *ref_list;
1524     int ref_idx;
1525
1526     if (list_index == 0) {
1527         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1528         ref_list = slice_param->RefPicList0;
1529     } else {
1530         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1531         ref_list = slice_param->RefPicList1;
1532     }
1533
1534     if (max_num_references == 1) {
1535         if (list_index == 0) {
1536             ref_surface_id = slice_param->RefPicList0[0].picture_id;
1537             vme_context->used_references[0] = &slice_param->RefPicList0[0];
1538         } else {
1539             ref_surface_id = slice_param->RefPicList1[0].picture_id;
1540             vme_context->used_references[1] = &slice_param->RefPicList1[0];
1541         }
1542
1543         if (ref_surface_id != VA_INVALID_SURFACE)
1544             obj_surface = SURFACE(ref_surface_id);
1545
1546         if (!obj_surface ||
1547             !obj_surface->bo) {
1548             obj_surface = encode_state->reference_objects[list_index];
1549             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1550         }
1551
1552         ref_idx = 0;
1553     } else {
1554         curr_pic = &pic_param->CurrPic;
1555
1556         /* select the reference frame in temporal space */
1557         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1558         ref_surface_id = ref_list[ref_idx].picture_id;
1559
1560         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1561             obj_surface = SURFACE(ref_surface_id);
1562
1563         vme_context->used_reference_objects[list_index] = obj_surface;
1564         vme_context->used_references[list_index] = &ref_list[ref_idx];
1565     }
1566
1567     if (obj_surface &&
1568         obj_surface->bo) {
1569         assert(ref_idx >= 0);
1570         vme_context->used_reference_objects[list_index] = obj_surface;
1571         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1572         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1573                                                     ref_idx << 16 |
1574                                                     ref_idx <<  8 |
1575                                                     ref_idx);
1576     } else {
1577         vme_context->used_reference_objects[list_index] = NULL;
1578         vme_context->used_references[list_index] = NULL;
1579         vme_context->ref_index_in_mb[list_index] = 0;
1580     }
1581 }
1582
1583 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1584                                         struct encode_state *encode_state,
1585                                         struct intel_encoder_context *encoder_context,
1586                                         int slice_index,
1587                                         struct intel_batchbuffer *slice_batch)
1588 {
1589     int count, i, start_index;
1590     unsigned int length_in_bits;
1591     VAEncPackedHeaderParameterBuffer *param = NULL;
1592     unsigned int *header_data = NULL;
1593     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1594     int slice_header_index;
1595
1596     if (encode_state->slice_header_index[slice_index] == 0)
1597         slice_header_index = -1;
1598     else
1599         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1600
1601     count = encode_state->slice_rawdata_count[slice_index];
1602     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1603
1604     for (i = 0; i < count; i++) {
1605         unsigned int skip_emul_byte_cnt;
1606
1607         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1608
1609         param = (VAEncPackedHeaderParameterBuffer *)
1610                     (encode_state->packed_header_params_ext[start_index + i]->buffer);
1611
1612         /* skip the slice header packed data type as it is lastly inserted */
1613         if (param->type == VAEncPackedHeaderSlice)
1614             continue;
1615
1616         length_in_bits = param->bit_length;
1617
1618         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1619
1620         /* as the slice header is still required, the last header flag is set to
1621          * zero.
1622          */
1623         mfc_context->insert_object(ctx,
1624                                    encoder_context,
1625                                    header_data,
1626                                    ALIGN(length_in_bits, 32) >> 5,
1627                                    length_in_bits & 0x1f,
1628                                    skip_emul_byte_cnt,
1629                                    0,
1630                                    0,
1631                                    !param->has_emulation_bytes,
1632                                    slice_batch);
1633     }
1634
1635     if (slice_header_index == -1) {
1636         unsigned char *slice_header = NULL;
1637         int slice_header_length_in_bits = 0;
1638         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1639         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1640         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1641
1642         /* No slice header data is passed. And the driver needs to generate it */
1643         /* For the Normal H264 */
1644         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1645                                                              pPicParameter,
1646                                                              pSliceParameter,
1647                                                              &slice_header);
1648         mfc_context->insert_object(ctx, encoder_context,
1649                                    (unsigned int *)slice_header,
1650                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1651                                    slice_header_length_in_bits & 0x1f,
1652                                    5,  /* first 5 bytes are start code + nal unit type */
1653                                    1, 0, 1, slice_batch);
1654
1655         free(slice_header);
1656     } else {
1657         unsigned int skip_emul_byte_cnt;
1658
1659         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1660
1661         param = (VAEncPackedHeaderParameterBuffer *)
1662                     (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1663         length_in_bits = param->bit_length;
1664
1665         /* as the slice header is the last header data for one slice,
1666          * the last header flag is set to one.
1667          */
1668         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1669
1670         mfc_context->insert_object(ctx,
1671                                    encoder_context,
1672                                    header_data,
1673                                    ALIGN(length_in_bits, 32) >> 5,
1674                                    length_in_bits & 0x1f,
1675                                    skip_emul_byte_cnt,
1676                                    1,
1677                                    0,
1678                                    !param->has_emulation_bytes,
1679                                    slice_batch);
1680     }
1681
1682     return;
1683 }
1684
1685 void
1686 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1687                                 struct encode_state *encode_state,
1688                                 struct intel_encoder_context *encoder_context)
1689 {
1690     struct i965_driver_data *i965 = i965_driver_data(ctx);
1691     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1692     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1693     int qp;
1694     dri_bo *bo;
1695     uint8_t *cost_table;
1696
1697     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1698
1699
1700     if (slice_type == SLICE_TYPE_I) {
1701         if (vme_context->i_qp_cost_table)
1702             return;
1703     } else if (slice_type == SLICE_TYPE_P) {
1704         if (vme_context->p_qp_cost_table)
1705             return;
1706     } else {
1707         if (vme_context->b_qp_cost_table)
1708             return;
1709     }
1710
1711     /* It is enough to allocate 32 bytes for each qp. */
1712     bo = dri_bo_alloc(i965->intel.bufmgr,
1713                       "cost_table ",
1714                       QP_MAX * 32,
1715                       64);
1716
1717     dri_bo_map(bo, 1);
1718     assert(bo->virtual);
1719     cost_table = (uint8_t *)(bo->virtual);
1720     for (qp = 0; qp < QP_MAX; qp++) {
1721         intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1722         cost_table += 32;
1723     }
1724
1725     dri_bo_unmap(bo);
1726
1727     if (slice_type == SLICE_TYPE_I) {
1728         vme_context->i_qp_cost_table = bo;
1729     } else if (slice_type == SLICE_TYPE_P) {
1730         vme_context->p_qp_cost_table = bo;
1731     } else {
1732         vme_context->b_qp_cost_table = bo;
1733     }
1734
1735     vme_context->cost_table_size = QP_MAX * 32;
1736     return;
1737 }
1738
1739 extern void
1740 intel_h264_setup_cost_surface(VADriverContextP ctx,
1741                               struct encode_state *encode_state,
1742                               struct intel_encoder_context *encoder_context,
1743                               unsigned long binding_table_offset,
1744                               unsigned long surface_state_offset)
1745 {
1746     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1747     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1748     dri_bo *bo;
1749
1750
1751     struct i965_buffer_surface cost_table;
1752
1753     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1754
1755
1756     if (slice_type == SLICE_TYPE_I) {
1757         bo = vme_context->i_qp_cost_table;
1758     } else if (slice_type == SLICE_TYPE_P) {
1759         bo = vme_context->p_qp_cost_table;
1760     } else {
1761         bo = vme_context->b_qp_cost_table;
1762     }
1763
1764     cost_table.bo = bo;
1765     cost_table.num_blocks = QP_MAX;
1766     cost_table.pitch = 16;
1767     cost_table.size_block = 32;
1768
1769     vme_context->vme_buffer_suface_setup(ctx,
1770                                          &vme_context->gpe_context,
1771                                          &cost_table,
1772                                          binding_table_offset,
1773                                          surface_state_offset);
1774 }
1775
1776 /*
1777  * the idea of conversion between qp and qstep comes from scaling process
1778  * of transform coeff for Luma component in H264 spec.
1779  *   2^(Qpy / 6 - 6)
1780  * In order to avoid too small qstep, it is multiplied by 16.
1781  */
1782 static float intel_h264_qp_qstep(int qp)
1783 {
1784     float value, qstep;
1785     value = qp;
1786     value = value / 6 - 2;
1787     qstep = powf(2, value);
1788     return qstep;
1789 }
1790
1791 static int intel_h264_qstep_qp(float qstep)
1792 {
1793     float qp;
1794
1795     qp = 12.0f + 6.0f * log2f(qstep);
1796
1797     return floorf(qp);
1798 }
1799
1800 /*
1801  * Currently it is based on the following assumption:
1802  * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1803  *                                 total_aread * 1 / baseqp_qstep
1804  *
1805  * qstep is the linearized quantizer of H264 quantizer
1806  */
1807 typedef struct {
1808     int row_start_in_mb;
1809     int row_end_in_mb;
1810     int col_start_in_mb;
1811     int col_end_in_mb;
1812
1813     int width_mbs;
1814     int height_mbs;
1815
1816     int roi_qp;
1817 } ROIRegionParam;
1818
1819 static VAStatus
1820 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1821                        int base_qp,
1822                        VAEncMiscParameterBufferROI *pMiscParamROI,
1823                        struct encode_state *encode_state,
1824                        struct intel_encoder_context *encoder_context)
1825 {
1826     int nonroi_qp;
1827     VAEncROI *region_roi;
1828     bool quickfill = 0;
1829
1830     ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1831     int num_roi = 0;
1832     int i,j;
1833
1834     float temp;
1835     float qstep_nonroi, qstep_base;
1836     float roi_area, total_area, nonroi_area;
1837     float sum_roi;
1838
1839     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1840     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1841     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1842     int mbs_in_picture = width_in_mbs * height_in_mbs;
1843
1844     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1845     VAStatus vaStatus = VA_STATUS_SUCCESS;
1846
1847     if(pMiscParamROI != NULL)
1848     {
1849         num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi;
1850
1851         /* currently roi_value_is_qp_delta is the only supported mode of priority.
1852         *
1853         * qp_delta set by user is added to base_qp, which is then clapped by
1854         * [base_qp-min_delta, base_qp+max_delta].
1855         */
1856         ASSERT_RET(pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta,VA_STATUS_ERROR_INVALID_PARAMETER);
1857     }
1858
1859     /* when the base_qp is lower than 12, the quality is quite good based
1860      * on the H264 test experience.
1861      * In such case it is unnecessary to adjust the quality for ROI region.
1862      */
1863     if (base_qp <= 12) {
1864         nonroi_qp = base_qp;
1865         quickfill = 1;
1866         goto qp_fill;
1867     }
1868
1869     sum_roi = 0.0f;
1870     roi_area = 0;
1871     for (i = 0; i < num_roi; i++) {
1872         int row_start, row_end, col_start, col_end;
1873         int roi_width_mbs, roi_height_mbs;
1874         int mbs_in_roi;
1875         int roi_qp;
1876         float qstep_roi;
1877
1878         region_roi =  (VAEncROI *)pMiscParamROI->roi + i;
1879
1880         col_start = region_roi->roi_rectangle.x;
1881         col_end = col_start + region_roi->roi_rectangle.width;
1882         row_start = region_roi->roi_rectangle.y;
1883         row_end = row_start + region_roi->roi_rectangle.height;
1884         col_start = col_start / 16;
1885         col_end = (col_end + 15) / 16;
1886         row_start = row_start / 16;
1887         row_end = (row_end + 15) / 16;
1888
1889         roi_width_mbs = col_end - col_start;
1890         roi_height_mbs = row_end - row_start;
1891         mbs_in_roi = roi_width_mbs * roi_height_mbs;
1892
1893         param_regions[i].row_start_in_mb = row_start;
1894         param_regions[i].row_end_in_mb = row_end;
1895         param_regions[i].col_start_in_mb = col_start;
1896         param_regions[i].col_end_in_mb = col_end;
1897         param_regions[i].width_mbs = roi_width_mbs;
1898         param_regions[i].height_mbs = roi_height_mbs;
1899
1900         roi_qp = base_qp + region_roi->roi_value;
1901         BRC_CLIP(roi_qp, 1, 51);
1902
1903         param_regions[i].roi_qp = roi_qp;
1904         qstep_roi = intel_h264_qp_qstep(roi_qp);
1905
1906         roi_area += mbs_in_roi;
1907         sum_roi += mbs_in_roi / qstep_roi;
1908     }
1909
1910     total_area = mbs_in_picture;
1911     nonroi_area = total_area - roi_area;
1912
1913     qstep_base = intel_h264_qp_qstep(base_qp);
1914     temp = (total_area / qstep_base - sum_roi);
1915
1916     if (temp < 0) {
1917         nonroi_qp = 51;
1918     } else {
1919         qstep_nonroi = nonroi_area / temp;
1920         nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
1921     }
1922
1923     BRC_CLIP(nonroi_qp, 1, 51);
1924
1925 qp_fill:
1926     memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
1927     if (!quickfill) {
1928         char *qp_ptr;
1929
1930         for (i = 0; i < num_roi; i++) {
1931             for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
1932                 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
1933                 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
1934             }
1935         }
1936     }
1937     return vaStatus;
1938 }
1939
1940 extern void
1941 intel_h264_enc_roi_config(VADriverContextP ctx,
1942                           struct encode_state *encode_state,
1943                           struct intel_encoder_context *encoder_context)
1944 {
1945     char *qp_ptr;
1946     int i, j;
1947     VAEncROI *region_roi;
1948     struct i965_driver_data *i965 = i965_driver_data(ctx);
1949     VAEncMiscParameterBuffer* pMiscParamROI;
1950     VAEncMiscParameterBufferROI *pParamROI = NULL;
1951     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1952     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1953     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1954     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1955     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1956
1957     int row_start, row_end, col_start, col_end;
1958     int num_roi = 0;
1959
1960     vme_context->roi_enabled = 0;
1961     /* Restriction: Disable ROI when multi-slice is enabled */
1962     if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1963         return;
1964
1965     if (encode_state->misc_param[VAEncMiscParameterTypeROI] != NULL) {
1966         pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI]->buffer;
1967         pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
1968
1969         /* check whether number of ROI is correct */
1970         num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi;
1971     }
1972
1973     if (num_roi > 0)
1974         vme_context->roi_enabled = 1;
1975
1976     if (!vme_context->roi_enabled)
1977         return;
1978
1979     if ((vme_context->saved_width_mbs !=  width_in_mbs) ||
1980         (vme_context->saved_height_mbs != height_in_mbs)) {
1981         free(vme_context->qp_per_mb);
1982         vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1983
1984         vme_context->saved_width_mbs = width_in_mbs;
1985         vme_context->saved_height_mbs = height_in_mbs;
1986         assert(vme_context->qp_per_mb);
1987     }
1988     if (encoder_context->rate_control_mode == VA_RC_CBR) {
1989         /*
1990          * TODO: More complex Qp adjust needs to be added.
1991          * Currently it is initialized to slice_qp.
1992          */
1993         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1994         int qp;
1995         int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1996
1997         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1998         intel_h264_enc_roi_cbr(ctx, qp, pParamROI,encode_state, encoder_context);
1999
2000     } else if (encoder_context->rate_control_mode == VA_RC_CQP){
2001         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2002         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2003         int qp;
2004
2005         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2006         memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
2007
2008
2009         for (j = num_roi; j ; j--) {
2010             int qp_delta, qp_clip;
2011
2012             region_roi =  (VAEncROI *)pParamROI->roi + j - 1;
2013
2014             col_start = region_roi->roi_rectangle.x;
2015             col_end = col_start + region_roi->roi_rectangle.width;
2016             row_start = region_roi->roi_rectangle.y;
2017             row_end = row_start + region_roi->roi_rectangle.height;
2018
2019             col_start = col_start / 16;
2020             col_end = (col_end + 15) / 16;
2021             row_start = row_start / 16;
2022             row_end = (row_end + 15) / 16;
2023
2024             qp_delta = region_roi->roi_value;
2025             qp_clip = qp + qp_delta;
2026
2027             BRC_CLIP(qp_clip, 1, 51);
2028
2029             for (i = row_start; i < row_end; i++) {
2030                 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
2031                 memset(qp_ptr, qp_clip, (col_end - col_start));
2032             }
2033         }
2034     } else {
2035         /*
2036          * TODO: Disable it for non CBR-CQP.
2037          */
2038         vme_context->roi_enabled = 0;
2039     }
2040
2041     if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2042         encoder_context->soft_batch_force = 1;
2043
2044     return;
2045 }
2046
2047 /* HEVC */
2048 static int
2049 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2050                            VAPictureHEVC *ref_list,
2051                            int num_pictures,
2052                            int dir)
2053 {
2054     int i, found = -1, min = 0x7FFFFFFF;
2055
2056     for (i = 0; i < num_pictures; i++) {
2057         int tmp;
2058
2059         if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2060             (ref_list[i].picture_id == VA_INVALID_SURFACE))
2061             break;
2062
2063         tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2064
2065         if (dir)
2066             tmp = -tmp;
2067
2068         if (tmp > 0 && tmp < min) {
2069             min = tmp;
2070             found = i;
2071         }
2072     }
2073
2074     return found;
2075 }
2076 void
2077 intel_hevc_vme_reference_state(VADriverContextP ctx,
2078                                struct encode_state *encode_state,
2079                                struct intel_encoder_context *encoder_context,
2080                                int list_index,
2081                                int surface_index,
2082                                void (* vme_source_surface_state)(
2083                                    VADriverContextP ctx,
2084                                    int index,
2085                                    struct object_surface *obj_surface,
2086                                    struct intel_encoder_context *encoder_context))
2087 {
2088     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2089     struct object_surface *obj_surface = NULL;
2090     struct i965_driver_data *i965 = i965_driver_data(ctx);
2091     VASurfaceID ref_surface_id;
2092     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2093     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2094     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2095     int max_num_references;
2096     VAPictureHEVC *curr_pic;
2097     VAPictureHEVC *ref_list;
2098     int ref_idx;
2099     unsigned int is_hevc10 = 0;
2100     GenHevcSurface *hevc_encoder_surface = NULL;
2101
2102     if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2103         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2104         is_hevc10 = 1;
2105
2106     if (list_index == 0) {
2107         max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2108         ref_list = slice_param->ref_pic_list0;
2109     } else {
2110         max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2111         ref_list = slice_param->ref_pic_list1;
2112     }
2113
2114     if (max_num_references == 1) {
2115         if (list_index == 0) {
2116             ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2117             vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2118         } else {
2119             ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2120             vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2121         }
2122
2123         if (ref_surface_id != VA_INVALID_SURFACE)
2124             obj_surface = SURFACE(ref_surface_id);
2125
2126         if (!obj_surface ||
2127             !obj_surface->bo) {
2128             obj_surface = encode_state->reference_objects[list_index];
2129             vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2130         }
2131
2132         ref_idx = 0;
2133     } else {
2134         curr_pic = &pic_param->decoded_curr_pic;
2135
2136         /* select the reference frame in temporal space */
2137         ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2138         ref_surface_id = ref_list[ref_idx].picture_id;
2139
2140         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2141             obj_surface = SURFACE(ref_surface_id);
2142
2143         vme_context->used_reference_objects[list_index] = obj_surface;
2144         vme_context->used_references[list_index] = &ref_list[ref_idx];
2145     }
2146
2147     if (obj_surface &&
2148         obj_surface->bo) {
2149         assert(ref_idx >= 0);
2150         vme_context->used_reference_objects[list_index] = obj_surface;
2151
2152         if(is_hevc10){
2153             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2154             assert(hevc_encoder_surface);
2155             obj_surface = hevc_encoder_surface->nv12_surface_obj;
2156         }
2157         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2158         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2159                 ref_idx << 16 |
2160                 ref_idx <<  8 |
2161                 ref_idx);
2162     } else {
2163         vme_context->used_reference_objects[list_index] = NULL;
2164         vme_context->used_references[list_index] = NULL;
2165         vme_context->ref_index_in_mb[list_index] = 0;
2166     }
2167 }
2168
2169 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2170                                      struct encode_state *encode_state,
2171                                      struct intel_encoder_context *encoder_context)
2172 {
2173     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2174     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2175     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2176     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2177     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2178     int qp, m_cost, j, mv_count;
2179     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2180     float   lambda, m_costf;
2181
2182     /* here no SI SP slice for HEVC, do not need slice fixup */
2183     int slice_type = slice_param->slice_type;
2184
2185
2186     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2187
2188     if(encoder_context->rate_control_mode == VA_RC_CBR)
2189     {
2190         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2191         if(slice_type == HEVC_SLICE_B) {
2192             if(pSequenceParameter->ip_period == 1)
2193             {
2194                 slice_type = HEVC_SLICE_P;
2195                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2196
2197             }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2198                 slice_type = HEVC_SLICE_P;
2199                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2200             }
2201         }
2202
2203     }
2204
2205     if (vme_state_message == NULL)
2206         return;
2207
2208     assert(qp <= QP_MAX);
2209     lambda = intel_lambda_qp(qp);
2210     if (slice_type == HEVC_SLICE_I) {
2211         vme_state_message[MODE_INTRA_16X16] = 0;
2212         m_cost = lambda * 4;
2213         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2214         m_cost = lambda * 16;
2215         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2216         m_cost = lambda * 3;
2217         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2218     } else {
2219         m_cost = 0;
2220         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2221         for (j = 1; j < 3; j++) {
2222             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2223             m_cost = (int)m_costf;
2224             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2225         }
2226         mv_count = 3;
2227         for (j = 4; j <= 64; j *= 2) {
2228             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2229             m_cost = (int)m_costf;
2230             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2231             mv_count++;
2232         }
2233
2234         if (qp <= 25) {
2235             vme_state_message[MODE_INTRA_16X16] = 0x4a;
2236             vme_state_message[MODE_INTRA_8X8] = 0x4a;
2237             vme_state_message[MODE_INTRA_4X4] = 0x4a;
2238             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2239             vme_state_message[MODE_INTER_16X16] = 0x4a;
2240             vme_state_message[MODE_INTER_16X8] = 0x4a;
2241             vme_state_message[MODE_INTER_8X8] = 0x4a;
2242             vme_state_message[MODE_INTER_8X4] = 0x4a;
2243             vme_state_message[MODE_INTER_4X4] = 0x4a;
2244             vme_state_message[MODE_INTER_BWD] = 0x2a;
2245             return;
2246         }
2247         m_costf = lambda * 10;
2248         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2249         m_cost = lambda * 14;
2250         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2251         m_cost = lambda * 24;
2252         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2253         m_costf = lambda * 3.5;
2254         m_cost = m_costf;
2255         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2256         if (slice_type == HEVC_SLICE_P) {
2257             m_costf = lambda * 2.5;
2258             m_cost = m_costf;
2259             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2260             m_costf = lambda * 4;
2261             m_cost = m_costf;
2262             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2263             m_costf = lambda * 1.5;
2264             m_cost = m_costf;
2265             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2266             m_costf = lambda * 3;
2267             m_cost = m_costf;
2268             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2269             m_costf = lambda * 5;
2270             m_cost = m_costf;
2271             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2272             /* BWD is not used in P-frame */
2273             vme_state_message[MODE_INTER_BWD] = 0;
2274         } else {
2275             m_costf = lambda * 2.5;
2276             m_cost = m_costf;
2277             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2278             m_costf = lambda * 5.5;
2279             m_cost = m_costf;
2280             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2281             m_costf = lambda * 3.5;
2282             m_cost = m_costf;
2283             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2284             m_costf = lambda * 5.0;
2285             m_cost = m_costf;
2286             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2287             m_costf = lambda * 6.5;
2288             m_cost = m_costf;
2289             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2290             m_costf = lambda * 1.5;
2291             m_cost = m_costf;
2292             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
2293         }
2294     }
2295 }