OSDN Git Service

Encoding: Add one ROI flag and ROI buffer
[android-x86/hardware-intel-common-vaapi.git] / src / gen6_mfc_common.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "gen9_mfc.h"
45 #include "intel_media.h"
46
47 #ifndef HAVE_LOG2F
48 #define log2f(x) (logf(x)/(float)M_LN2)
49 #endif
50
51 int intel_avc_enc_slice_type_fixup(int slice_type)
52 {
53     if (slice_type == SLICE_TYPE_SP ||
54         slice_type == SLICE_TYPE_P)
55         slice_type = SLICE_TYPE_P;
56     else if (slice_type == SLICE_TYPE_SI ||
57              slice_type == SLICE_TYPE_I)
58         slice_type = SLICE_TYPE_I;
59     else {
60         if (slice_type != SLICE_TYPE_B)
61             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
62
63         slice_type = SLICE_TYPE_B;
64     }
65
66     return slice_type;
67 }
68
69 static void
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
71                                         struct gen6_mfc_context *mfc_context)
72 {
73     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
74     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
75     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
76     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
77     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
78     int intra_mb_size = inter_mb_size * 5.0;
79     int i;
80
81     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
82     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
83     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
84     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
85     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
86     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
87
88     for(i = 0 ; i < 3; i++) {
89         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
90         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
91         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
92         mfc_context->bit_rate_control_context[i].GrowInit = 6;
93         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
94         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
95         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
96         
97         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
98         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
99         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
100         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
101         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
102         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
103     }
104     
105     mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
106     mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
107     mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
108
109     mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
110     mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
111     mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
112 }
113
114 static void intel_mfc_brc_init(struct encode_state *encode_state,
115                                struct intel_encoder_context* encoder_context)
116 {
117     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
118     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
119     VAEncMiscParameterBuffer* pMiscParamHRD = NULL;
120     VAEncMiscParameterHRD* pParameterHRD = NULL;
121     double bitrate = pSequenceParameter->bits_per_second;
122     double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
123     int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
124     int intra_period = pSequenceParameter->intra_period;
125     int ip_period = pSequenceParameter->ip_period;
126     double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
127     double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
128     double bpf;
129
130     if (!encode_state->misc_param[VAEncMiscParameterTypeHRD] || !encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer)
131         return;
132
133     pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
134     pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
135
136     if (pSequenceParameter->ip_period) {
137         pnum = (intra_period + ip_period - 1)/ip_period - 1;
138         bnum = intra_period - inum - pnum;
139     }
140
141     mfc_context->brc.mode = encoder_context->rate_control_mode;
142
143     mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
144                                                              (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
145     mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
146     mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
147
148     mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
149     mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
150     mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
151
152     bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
153
154     mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
155     mfc_context->hrd.current_buffer_fullness =
156         (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
157         pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
158     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
159     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
160     mfc_context->hrd.violation_noted = 0;
161
162     if ((bpf > qp51_size) && (bpf < qp1_size)) {
163         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
164     }
165     else if (bpf >= qp1_size)
166         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
167     else if (bpf <= qp51_size)
168         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
169
170     mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
171     mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
172
173     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
174     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
175     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
176 }
177
178 int intel_mfc_update_hrd(struct encode_state *encode_state,
179                          struct gen6_mfc_context *mfc_context,
180                          int frame_bits)
181 {
182     double prev_bf = mfc_context->hrd.current_buffer_fullness;
183
184     mfc_context->hrd.current_buffer_fullness -= frame_bits;
185
186     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
187         mfc_context->hrd.current_buffer_fullness = prev_bf;
188         return BRC_UNDERFLOW;
189     }
190     
191     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
192     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
193         if (mfc_context->brc.mode == VA_RC_VBR)
194             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
195         else {
196             mfc_context->hrd.current_buffer_fullness = prev_bf;
197             return BRC_OVERFLOW;
198         }
199     }
200     return BRC_NO_HRD_VIOLATION;
201 }
202
203 int intel_mfc_brc_postpack(struct encode_state *encode_state,
204                            struct gen6_mfc_context *mfc_context,
205                            int frame_bits)
206 {
207     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
208     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
209     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
210     int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
211     int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
212     int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
213     int qp; // quantizer of previously encoded slice of current type
214     int qpn; // predicted quantizer for next frame of current type in integer format
215     double qpf; // predicted quantizer for next frame of current type in float format
216     double delta_qp; // QP correction
217     int target_frame_size, frame_size_next;
218     /* Notes:
219      *  x - how far we are from HRD buffer borders
220      *  y - how far we are from target HRD buffer fullness
221      */
222     double x, y;
223     double frame_size_alpha;
224
225     qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
226
227     target_frame_size = mfc_context->brc.target_frame_size[slicetype];
228     if (mfc_context->hrd.buffer_capacity < 5)
229         frame_size_alpha = 0;
230     else
231         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
232     if (frame_size_alpha > 30) frame_size_alpha = 30;
233     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
234         (double)(frame_size_alpha + 1.);
235
236     /* frame_size_next: avoiding negative number and too small value */
237     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
238         frame_size_next = (int)((double)target_frame_size * 0.25);
239
240     qpf = (double)qp * target_frame_size / frame_size_next;
241     qpn = (int)(qpf + 0.5);
242
243     if (qpn == qp) {
244         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
245         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
246         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
247             qpn++;
248             mfc_context->brc.qpf_rounding_accumulator = 0.;
249         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
250             qpn--;
251             mfc_context->brc.qpf_rounding_accumulator = 0.;
252         }
253     }
254     /* making sure that QP is not changing too fast */
255     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
256     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
257     /* making sure that with QP predictions we did do not leave QPs range */
258     BRC_CLIP(qpn, 1, 51);
259
260     /* checking wthether HRD compliance is still met */
261     sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits);
262
263     /* calculating QP delta as some function*/
264     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
265     if (x > 0) {
266         x /= mfc_context->hrd.target_buffer_fullness;
267         y = mfc_context->hrd.current_buffer_fullness;
268     }
269     else {
270         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
271         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
272     }
273     if (y < 0.01) y = 0.01;
274     if (x > 1) x = 1;
275     else if (x < -1) x = -1;
276
277     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
278     qpn = (int)(qpn + delta_qp + 0.5);
279
280     /* making sure that with QP predictions we did do not leave QPs range */
281     BRC_CLIP(qpn, 1, 51);
282
283     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
284         /* correcting QPs of slices of other types */
285         if (slicetype == SLICE_TYPE_P) {
286             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
287                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
288             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
289                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
290         } else if (slicetype == SLICE_TYPE_I) {
291             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
292                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
293             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
294                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
295         } else { // SLICE_TYPE_B
296             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
297                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
298             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
299                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
300         }
301         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
302         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
303         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
304     } else if (sts == BRC_UNDERFLOW) { // underflow
305         if (qpn <= qp) qpn = qp + 1;
306         if (qpn > 51) {
307             qpn = 51;
308             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
309         }
310     } else if (sts == BRC_OVERFLOW) {
311         if (qpn >= qp) qpn = qp - 1;
312         if (qpn < 1) { // < 0 (?) overflow with minQP
313             qpn = 1;
314             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
315         }
316     }
317
318     mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
319
320     return sts;
321 }
322
323 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
324                                        struct intel_encoder_context *encoder_context)
325 {
326     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
327     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
328     unsigned int rate_control_mode = encoder_context->rate_control_mode;
329     int target_bit_rate = pSequenceParameter->bits_per_second;
330     
331     // current we only support CBR mode.
332     if (rate_control_mode == VA_RC_CBR) {
333         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
334         mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
335         mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
336         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
337         mfc_context->vui_hrd.i_frame_number = 0;
338
339         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; 
340         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
341         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
342     }
343
344 }
345
346 void 
347 intel_mfc_hrd_context_update(struct encode_state *encode_state, 
348                              struct gen6_mfc_context *mfc_context)
349 {
350     mfc_context->vui_hrd.i_frame_number++;
351 }
352
353 int intel_mfc_interlace_check(VADriverContextP ctx,
354                               struct encode_state *encode_state,
355                               struct intel_encoder_context *encoder_context)
356 {
357     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
358     VAEncSliceParameterBufferH264 *pSliceParameter;
359     int i;
360     int mbCount = 0;
361     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
362     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
363   
364     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
365         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer; 
366         mbCount += pSliceParameter->num_macroblocks; 
367     }
368     
369     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
370         return 0;
371
372     return 1;
373 }
374
375 /*
376  * Check whether the parameters related with CBR are updated and decide whether
377  * it needs to reinitialize the configuration related with CBR.
378  * Currently it will check the following parameters:
379  *      bits_per_second
380  *      frame_rate
381  *      gop_configuration(intra_period, ip_period, intra_idr_period)
382  */
383 static bool intel_mfc_brc_updated_check(struct encode_state *encode_state,
384                            struct intel_encoder_context *encoder_context)
385 {
386     unsigned int rate_control_mode = encoder_context->rate_control_mode;
387     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
388     double cur_fps, cur_bitrate;
389     VAEncSequenceParameterBufferH264 *pSequenceParameter;
390
391
392     if (rate_control_mode != VA_RC_CBR) {
393         return false;
394     }
395
396     pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
397
398     cur_bitrate = pSequenceParameter->bits_per_second;
399     cur_fps = (double)pSequenceParameter->time_scale /
400                 (2 * (double)pSequenceParameter->num_units_in_tick);
401
402     if ((cur_bitrate == mfc_context->brc.saved_bps) &&
403         (cur_fps == mfc_context->brc.saved_fps) &&
404         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) &&
405         (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) &&
406         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) {
407         /* the parameters related with CBR are not updaetd */
408         return false;
409     }
410
411     mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period;
412     mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period;
413     mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period;
414     mfc_context->brc.saved_fps = cur_fps;
415     mfc_context->brc.saved_bps = cur_bitrate;
416     return true;
417 }
418
419 void intel_mfc_brc_prepare(struct encode_state *encode_state,
420                            struct intel_encoder_context *encoder_context)
421 {
422     unsigned int rate_control_mode = encoder_context->rate_control_mode;
423     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
424
425     if (rate_control_mode == VA_RC_CBR) {
426         bool brc_updated;
427         assert(encoder_context->codec != CODEC_MPEG2);
428
429         brc_updated = intel_mfc_brc_updated_check(encode_state, encoder_context);
430
431         /*Programing bit rate control */
432         if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) ||
433              brc_updated) {
434             intel_mfc_bit_rate_control_context_init(encode_state, mfc_context);
435             intel_mfc_brc_init(encode_state, encoder_context);
436         }
437
438         /*Programing HRD control */
439         if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated )
440             intel_mfc_hrd_context_init(encode_state, encoder_context);    
441     }
442 }
443
444 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
445                                               struct encode_state *encode_state,
446                                               struct intel_encoder_context *encoder_context,
447                                               struct intel_batchbuffer *slice_batch)
448 {
449     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
450     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
451     unsigned int rate_control_mode = encoder_context->rate_control_mode;
452     unsigned int skip_emul_byte_cnt;
453
454     if (encode_state->packed_header_data[idx]) {
455         VAEncPackedHeaderParameterBuffer *param = NULL;
456         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
457         unsigned int length_in_bits;
458
459         assert(encode_state->packed_header_param[idx]);
460         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
461         length_in_bits = param->bit_length;
462
463         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
464         mfc_context->insert_object(ctx,
465                                    encoder_context,
466                                    header_data,
467                                    ALIGN(length_in_bits, 32) >> 5,
468                                    length_in_bits & 0x1f,
469                                    skip_emul_byte_cnt,
470                                    0,
471                                    0,
472                                    !param->has_emulation_bytes,
473                                    slice_batch);
474     }
475
476     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
477
478     if (encode_state->packed_header_data[idx]) {
479         VAEncPackedHeaderParameterBuffer *param = NULL;
480         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
481         unsigned int length_in_bits;
482
483         assert(encode_state->packed_header_param[idx]);
484         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
485         length_in_bits = param->bit_length;
486
487         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
488
489         mfc_context->insert_object(ctx,
490                                    encoder_context,
491                                    header_data,
492                                    ALIGN(length_in_bits, 32) >> 5,
493                                    length_in_bits & 0x1f,
494                                    skip_emul_byte_cnt,
495                                    0,
496                                    0,
497                                    !param->has_emulation_bytes,
498                                    slice_batch);
499     }
500     
501     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
502
503     if (encode_state->packed_header_data[idx]) {
504         VAEncPackedHeaderParameterBuffer *param = NULL;
505         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
506         unsigned int length_in_bits;
507
508         assert(encode_state->packed_header_param[idx]);
509         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
510         length_in_bits = param->bit_length;
511
512         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
513         mfc_context->insert_object(ctx,
514                                    encoder_context,
515                                    header_data,
516                                    ALIGN(length_in_bits, 32) >> 5,
517                                    length_in_bits & 0x1f,
518                                    skip_emul_byte_cnt,
519                                    0,
520                                    0,
521                                    !param->has_emulation_bytes,
522                                    slice_batch);
523     } else if (rate_control_mode == VA_RC_CBR) {
524         // this is frist AU
525         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
526
527         unsigned char *sei_data = NULL;
528     
529         int length_in_bits = build_avc_sei_buffer_timing(
530             mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
531             mfc_context->vui_hrd.i_initial_cpb_removal_delay,
532             0,
533             mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
534             mfc_context->vui_hrd.i_dpb_output_delay_length,
535             0,
536             &sei_data);
537         mfc_context->insert_object(ctx,
538                                    encoder_context,
539                                    (unsigned int *)sei_data,
540                                    ALIGN(length_in_bits, 32) >> 5,
541                                    length_in_bits & 0x1f,
542                                    5,
543                                    0,   
544                                    0,   
545                                    1,
546                                    slice_batch);  
547         free(sei_data);
548     }
549 }
550
551 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, 
552                                struct encode_state *encode_state,
553                                struct intel_encoder_context *encoder_context)
554 {
555     struct i965_driver_data *i965 = i965_driver_data(ctx);
556     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
557     struct object_surface *obj_surface; 
558     struct object_buffer *obj_buffer;
559     GenAvcSurface *gen6_avc_surface;
560     dri_bo *bo;
561     VAStatus vaStatus = VA_STATUS_SUCCESS;
562     int i, j, enable_avc_ildb = 0;
563     VAEncSliceParameterBufferH264 *slice_param;
564     struct i965_coded_buffer_segment *coded_buffer_segment;
565     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
566     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
567     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
568
569     if (IS_GEN6(i965->intel.device_info)) {
570         /* On the SNB it should be fixed to 128 for the DMV buffer */
571         width_in_mbs = 128;
572     }
573
574     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
575         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
576         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
577
578         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
579             assert((slice_param->slice_type == SLICE_TYPE_I) ||
580                    (slice_param->slice_type == SLICE_TYPE_SI) ||
581                    (slice_param->slice_type == SLICE_TYPE_P) ||
582                    (slice_param->slice_type == SLICE_TYPE_SP) ||
583                    (slice_param->slice_type == SLICE_TYPE_B));
584
585             if (slice_param->disable_deblocking_filter_idc != 1) {
586                 enable_avc_ildb = 1;
587                 break;
588             }
589
590             slice_param++;
591         }
592     }
593
594     /*Setup all the input&output object*/
595
596     /* Setup current frame and current direct mv buffer*/
597     obj_surface = encode_state->reconstructed_object;
598     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
599
600     if ( obj_surface->private_data == NULL) {
601         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
602         assert(gen6_avc_surface);
603         gen6_avc_surface->dmv_top = 
604             dri_bo_alloc(i965->intel.bufmgr,
605                          "Buffer",
606                          68 * width_in_mbs * height_in_mbs, 
607                          64);
608         gen6_avc_surface->dmv_bottom = 
609             dri_bo_alloc(i965->intel.bufmgr,
610                          "Buffer",
611                          68 * width_in_mbs * height_in_mbs, 
612                          64);
613         assert(gen6_avc_surface->dmv_top);
614         assert(gen6_avc_surface->dmv_bottom);
615         obj_surface->private_data = (void *)gen6_avc_surface;
616         obj_surface->free_private_data = (void *)gen_free_avc_surface; 
617     }
618     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
619     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
620     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
621     dri_bo_reference(gen6_avc_surface->dmv_top);
622     dri_bo_reference(gen6_avc_surface->dmv_bottom);
623
624     if (enable_avc_ildb) {
625         mfc_context->post_deblocking_output.bo = obj_surface->bo;
626         dri_bo_reference(mfc_context->post_deblocking_output.bo);
627     } else {
628         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
629         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
630     }
631
632     mfc_context->surface_state.width = obj_surface->orig_width;
633     mfc_context->surface_state.height = obj_surface->orig_height;
634     mfc_context->surface_state.w_pitch = obj_surface->width;
635     mfc_context->surface_state.h_pitch = obj_surface->height;
636     
637     /* Setup reference frames and direct mv buffers*/
638     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
639         obj_surface = encode_state->reference_objects[i];
640         
641         if (obj_surface && obj_surface->bo) {
642             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
643             dri_bo_reference(obj_surface->bo);
644
645             /* Check DMV buffer */
646             if ( obj_surface->private_data == NULL) {
647                 
648                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
649                 assert(gen6_avc_surface);
650                 gen6_avc_surface->dmv_top = 
651                     dri_bo_alloc(i965->intel.bufmgr,
652                                  "Buffer",
653                                  68 * width_in_mbs * height_in_mbs, 
654                                  64);
655                 gen6_avc_surface->dmv_bottom = 
656                     dri_bo_alloc(i965->intel.bufmgr,
657                                  "Buffer",
658                                  68 * width_in_mbs * height_in_mbs, 
659                                  64);
660                 assert(gen6_avc_surface->dmv_top);
661                 assert(gen6_avc_surface->dmv_bottom);
662                 obj_surface->private_data = gen6_avc_surface;
663                 obj_surface->free_private_data = gen_free_avc_surface; 
664             }
665     
666             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
667             /* Setup DMV buffer */
668             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
669             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
670             dri_bo_reference(gen6_avc_surface->dmv_top);
671             dri_bo_reference(gen6_avc_surface->dmv_bottom);
672         } else {
673             break;
674         }
675     }
676
677     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
678     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
679
680     obj_buffer = encode_state->coded_buf_object;
681     bo = obj_buffer->buffer_store->bo;
682     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
683     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
684     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
685     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
686     
687     dri_bo_map(bo, 1);
688     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
689     coded_buffer_segment->mapped = 0;
690     coded_buffer_segment->codec = encoder_context->codec;
691     dri_bo_unmap(bo);
692
693     return vaStatus;
694 }
695 /*
696  * The LUT uses the pair of 4-bit units: (shift, base) structure.
697  * 2^K * X = value . 
698  * So it is necessary to convert one cost into the nearest LUT format.
699  * The derivation is:
700  * 2^K *x = 2^n * (1 + deltaX)
701  *    k + log2(x) = n + log2(1 + deltaX)
702  *    log2(x) = n - k + log2(1 + deltaX)
703  *    As X is in the range of [1, 15]
704  *      4 > n - k + log2(1 + deltaX) >= 0 
705  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
706  *    Then we can derive the corresponding K and get the nearest LUT format.
707  */
708 int intel_format_lutvalue(int value, int max)
709 {
710     int ret;
711     int logvalue, temp1, temp2;
712
713     if (value <= 0)
714         return 0;
715
716     logvalue = (int)(log2f((float)value));
717     if (logvalue < 4) {
718         ret = value;
719     } else {
720         int error, temp_value, base, j, temp_err;
721         error = value;
722         j = logvalue - 4 + 1;
723         ret = -1;
724         for(; j <= logvalue; j++) {
725             if (j == 0) {
726                 base = value >> j;
727             } else {
728                 base = (value + (1 << (j - 1)) - 1) >> j;
729             }
730             if (base >= 16)
731                 continue;
732
733             temp_value = base << j;
734             temp_err = abs(value - temp_value);
735             if (temp_err < error) {
736                 error = temp_err;
737                 ret = (j << 4) | base;
738                 if (temp_err == 0)
739                     break;
740             }
741         }
742     }
743     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
744     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
745     if (temp1 > temp2)
746         ret = max;
747     return ret;
748
749 }
750
751
752 #define         QP_MAX                  52
753 #define         VP8_QP_MAX              128
754
755
756 static float intel_lambda_qp(int qp)
757 {
758     float value, lambdaf;
759     value = qp;
760     value = value / 6 - 2;
761     if (value < 0)
762         value = 0;
763     lambdaf = roundf(powf(2, value));
764     return lambdaf;
765 }
766
767 static
768 void intel_h264_calc_mbmvcost_qp(int qp,
769                                  int slice_type,
770                                  uint8_t *vme_state_message)
771 {
772     int m_cost, j, mv_count;
773     float   lambda, m_costf;
774
775     assert(qp <= QP_MAX); 
776     lambda = intel_lambda_qp(qp);
777
778     m_cost = lambda;
779     vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
780     vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
781
782     if (slice_type == SLICE_TYPE_I) {
783         vme_state_message[MODE_INTRA_16X16] = 0;
784         m_cost = lambda * 4;
785         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
786         m_cost = lambda * 16; 
787         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
788         m_cost = lambda * 3;
789         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
790     } else {
791         m_cost = 0;
792         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
793         for (j = 1; j < 3; j++) {
794             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
795             m_cost = (int)m_costf;
796             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
797         }
798         mv_count = 3;
799         for (j = 4; j <= 64; j *= 2) {
800             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
801             m_cost = (int)m_costf;
802             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
803             mv_count++;
804         }
805
806         if (qp <= 25) {
807             vme_state_message[MODE_INTRA_16X16] = 0x4a;
808             vme_state_message[MODE_INTRA_8X8] = 0x4a;
809             vme_state_message[MODE_INTRA_4X4] = 0x4a;
810             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
811             vme_state_message[MODE_INTER_16X16] = 0x4a;
812             vme_state_message[MODE_INTER_16X8] = 0x4a;
813             vme_state_message[MODE_INTER_8X8] = 0x4a;
814             vme_state_message[MODE_INTER_8X4] = 0x4a;
815             vme_state_message[MODE_INTER_4X4] = 0x4a;
816             vme_state_message[MODE_INTER_BWD] = 0x2a;
817             return;
818         }
819         m_costf = lambda * 10;
820         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
821         m_cost = lambda * 14;
822         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
823         m_cost = lambda * 24; 
824         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
825         m_costf = lambda * 3.5;
826         m_cost = m_costf;
827         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
828         if (slice_type == SLICE_TYPE_P) {
829             m_costf = lambda * 2.5;
830             m_cost = m_costf;
831             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
832             m_costf = lambda * 4;
833             m_cost = m_costf;
834             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
835             m_costf = lambda * 1.5;
836             m_cost = m_costf;
837             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
838             m_costf = lambda * 3;
839             m_cost = m_costf;
840             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
841             m_costf = lambda * 5;
842             m_cost = m_costf;
843             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
844             /* BWD is not used in P-frame */
845             vme_state_message[MODE_INTER_BWD] = 0;
846         } else {
847             m_costf = lambda * 2.5;
848             m_cost = m_costf;
849             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
850             m_costf = lambda * 5.5;
851             m_cost = m_costf;
852             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
853             m_costf = lambda * 3.5;
854             m_cost = m_costf;
855             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
856             m_costf = lambda * 5.0;
857             m_cost = m_costf;
858             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
859             m_costf = lambda * 6.5;
860             m_cost = m_costf;
861             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
862             m_costf = lambda * 1.5;
863             m_cost = m_costf;
864             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
865         }
866     }
867     return;
868 }
869
870 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
871                                 struct encode_state *encode_state,
872                                 struct intel_encoder_context *encoder_context)
873 {
874     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
875     struct gen6_vme_context *vme_context = encoder_context->vme_context;
876     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
877     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
878     int qp;
879     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
880
881     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
882
883     if (encoder_context->rate_control_mode == VA_RC_CQP)
884         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
885     else
886         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
887
888     if (vme_state_message == NULL)
889         return;
890
891     intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
892 }
893
894 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
895                                 struct encode_state *encode_state,
896                                 struct intel_encoder_context *encoder_context)
897 {
898     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
899     struct gen6_vme_context *vme_context = encoder_context->vme_context;
900     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
901     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
902     int qp, m_cost, j, mv_count;
903     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
904     float   lambda, m_costf;
905
906     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
907     int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
908   
909     if (vme_state_message == NULL)
910         return;
911  
912     if (encoder_context->rate_control_mode == VA_RC_CQP)
913         qp = q_matrix->quantization_index[0];
914     else
915         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
916
917     lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
918
919     m_cost = lambda;
920     vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
921
922     if (is_key_frame) {
923         vme_state_message[MODE_INTRA_16X16] = 0;
924         m_cost = lambda * 16; 
925         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
926         m_cost = lambda * 3;
927         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
928     } else {
929         m_cost = 0;
930         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
931         for (j = 1; j < 3; j++) {
932             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
933             m_cost = (int)m_costf;
934             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
935         }
936         mv_count = 3;
937         for (j = 4; j <= 64; j *= 2) {
938             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
939             m_cost = (int)m_costf;
940             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
941             mv_count++;
942         }
943
944         if (qp < 92 ) {
945             vme_state_message[MODE_INTRA_16X16] = 0x4a;
946             vme_state_message[MODE_INTRA_4X4] = 0x4a;
947             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
948             vme_state_message[MODE_INTER_16X16] = 0x4a;
949             vme_state_message[MODE_INTER_16X8] = 0x4a;
950             vme_state_message[MODE_INTER_8X8] = 0x4a;
951             vme_state_message[MODE_INTER_4X4] = 0x4a;
952             vme_state_message[MODE_INTER_BWD] = 0;
953             return;
954         }
955         m_costf = lambda * 10;
956         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
957         m_cost = lambda * 24; 
958         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
959             
960         m_costf = lambda * 3.5;
961         m_cost = m_costf;
962         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
963
964         m_costf = lambda * 2.5;
965         m_cost = m_costf;
966         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
967         m_costf = lambda * 4;
968         m_cost = m_costf;
969         vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
970         m_costf = lambda * 1.5;
971         m_cost = m_costf;
972         vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
973         m_costf = lambda * 5;
974         m_cost = m_costf;
975         vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
976         /* BWD is not used in P-frame */
977         vme_state_message[MODE_INTER_BWD] = 0;
978     }
979 }
980
981 #define         MB_SCOREBOARD_A         (1 << 0)
982 #define         MB_SCOREBOARD_B         (1 << 1)
983 #define         MB_SCOREBOARD_C         (1 << 2)
984 void 
985 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
986 {
987     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
988     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
989     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
990                                                            MB_SCOREBOARD_B |
991                                                            MB_SCOREBOARD_C);
992
993     /* In VME prediction the current mb depends on the neighbour 
994      * A/B/C macroblock. So the left/up/up-right dependency should
995      * be considered.
996      */
997     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
998     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
999     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
1000     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
1001     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
1002     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
1003
1004     vme_context->gpe_context.vfe_desc7.dword = 0;
1005     return;
1006 }
1007
1008 /* check whether the mb of (x_index, y_index) is out of bound */
1009 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
1010 {
1011     int mb_index;
1012     if (x_index < 0 || x_index >= mb_width)
1013         return -1;
1014     if (y_index < 0 || y_index >= mb_height)
1015         return -1;
1016
1017     mb_index = y_index * mb_width + x_index;
1018     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
1019         return -1;
1020     return 0;
1021 }
1022
1023 void
1024 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1025                                      struct encode_state *encode_state,
1026                                      int mb_width, int mb_height,
1027                                      int kernel,
1028                                      int transform_8x8_mode_flag,
1029                                      struct intel_encoder_context *encoder_context)
1030 {
1031     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1032     int mb_row;
1033     int s;
1034     unsigned int *command_ptr;
1035     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1036     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1037     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1038     int qp;
1039     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1040
1041     if (encoder_context->rate_control_mode == VA_RC_CQP)
1042         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1043     else
1044         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1045
1046 #define         USE_SCOREBOARD          (1 << 21)
1047  
1048     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1049     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1050
1051     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1052         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1053         int first_mb = pSliceParameter->macroblock_address;
1054         int num_mb = pSliceParameter->num_macroblocks;
1055         unsigned int mb_intra_ub, score_dep;
1056         int x_outer, y_outer, x_inner, y_inner;
1057         int xtemp_outer = 0;
1058
1059         x_outer = first_mb % mb_width;
1060         y_outer = first_mb / mb_width;
1061         mb_row = y_outer;
1062
1063         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1064             x_inner = x_outer;
1065             y_inner = y_outer;
1066             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1067                 mb_intra_ub = 0;
1068                 score_dep = 0;
1069                 if (x_inner != 0) {
1070                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1071                     score_dep |= MB_SCOREBOARD_A; 
1072                 }
1073                 if (y_inner != mb_row) {
1074                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1075                     score_dep |= MB_SCOREBOARD_B;
1076                     if (x_inner != 0)
1077                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1078                     if (x_inner != (mb_width -1)) {
1079                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1080                         score_dep |= MB_SCOREBOARD_C;
1081                     }
1082                 }
1083
1084                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1085                 *command_ptr++ = kernel;
1086                 *command_ptr++ = USE_SCOREBOARD;
1087                 /* Indirect data */
1088                 *command_ptr++ = 0;
1089                 /* the (X, Y) term of scoreboard */
1090                 *command_ptr++ = ((y_inner << 16) | x_inner);
1091                 *command_ptr++ = score_dep;
1092                 /*inline data */
1093                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1094                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1095                 /* QP occupies one byte */
1096                 *command_ptr++ = qp;
1097                 x_inner -= 2;
1098                 y_inner += 1;
1099             }
1100             x_outer += 1;
1101         }
1102
1103         xtemp_outer = mb_width - 2;
1104         if (xtemp_outer < 0)
1105             xtemp_outer = 0;
1106         x_outer = xtemp_outer;
1107         y_outer = first_mb / mb_width;
1108         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1109             y_inner = y_outer;
1110             x_inner = x_outer;
1111             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1112                 mb_intra_ub = 0;
1113                 score_dep = 0;
1114                 if (x_inner != 0) {
1115                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1116                     score_dep |= MB_SCOREBOARD_A; 
1117                 }
1118                 if (y_inner != mb_row) {
1119                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1120                     score_dep |= MB_SCOREBOARD_B;
1121                     if (x_inner != 0)
1122                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1123
1124                     if (x_inner != (mb_width -1)) {
1125                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1126                         score_dep |= MB_SCOREBOARD_C;
1127                     }
1128                 }
1129
1130                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1131                 *command_ptr++ = kernel;
1132                 *command_ptr++ = USE_SCOREBOARD;
1133                 /* Indirect data */
1134                 *command_ptr++ = 0;
1135                 /* the (X, Y) term of scoreboard */
1136                 *command_ptr++ = ((y_inner << 16) | x_inner);
1137                 *command_ptr++ = score_dep;
1138                 /*inline data */
1139                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1140                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1141                 /* qp occupies one byte */
1142                 *command_ptr++ = qp;
1143
1144                 x_inner -= 2;
1145                 y_inner += 1;
1146             }
1147             x_outer++;
1148             if (x_outer >= mb_width) {
1149                 y_outer += 1;
1150                 x_outer = xtemp_outer;
1151             }           
1152         }
1153     }
1154
1155     *command_ptr++ = 0;
1156     *command_ptr++ = MI_BATCH_BUFFER_END;
1157
1158     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1159 }
1160
1161 static uint8_t
1162 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1163 {
1164     unsigned int is_long_term =
1165         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1166     unsigned int is_top_field =
1167         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1168     unsigned int is_bottom_field =
1169         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1170
1171     return ((is_long_term                         << 6) |
1172             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1173             (frame_store_id                       << 1) |
1174             ((is_top_field ^ 1) & is_bottom_field));
1175 }
1176
1177 void
1178 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1179                             struct encode_state *encode_state,
1180                             struct intel_encoder_context *encoder_context)
1181 {
1182     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1183     struct intel_batchbuffer *batch = encoder_context->base.batch;
1184     int slice_type;
1185     struct object_surface *obj_surface;
1186     unsigned int fref_entry, bref_entry;
1187     int frame_index, i;
1188     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1189
1190     fref_entry = 0x80808080;
1191     bref_entry = 0x80808080;
1192     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1193
1194     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1195         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1196
1197         if (ref_idx_l0 > 3) {
1198             WARN_ONCE("ref_idx_l0 is out of range\n");
1199             ref_idx_l0 = 0;
1200         }
1201
1202         obj_surface = vme_context->used_reference_objects[0];
1203         frame_index = -1;
1204         for (i = 0; i < 16; i++) {
1205             if (obj_surface &&
1206                 obj_surface == encode_state->reference_objects[i]) {
1207                 frame_index = i;
1208                 break;
1209             }
1210         }
1211         if (frame_index == -1) {
1212             WARN_ONCE("RefPicList0 is not found in DPB!\n");
1213         } else {
1214             int ref_idx_l0_shift = ref_idx_l0 * 8;
1215             fref_entry &= ~(0xFF << ref_idx_l0_shift);
1216             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1217         }
1218     }
1219
1220     if (slice_type == SLICE_TYPE_B) {
1221         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1222
1223         if (ref_idx_l1 > 3) {
1224             WARN_ONCE("ref_idx_l1 is out of range\n");
1225             ref_idx_l1 = 0;
1226         }
1227
1228         obj_surface = vme_context->used_reference_objects[1];
1229         frame_index = -1;
1230         for (i = 0; i < 16; i++) {
1231             if (obj_surface &&
1232                 obj_surface == encode_state->reference_objects[i]) {
1233                 frame_index = i;
1234                 break;
1235             }
1236         }
1237         if (frame_index == -1) {
1238             WARN_ONCE("RefPicList1 is not found in DPB!\n");
1239         } else {
1240             int ref_idx_l1_shift = ref_idx_l1 * 8;
1241             bref_entry &= ~(0xFF << ref_idx_l1_shift);
1242             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1243         }
1244     }
1245
1246     BEGIN_BCS_BATCH(batch, 10);
1247     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1248     OUT_BCS_BATCH(batch, 0);                  //Select L0
1249     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1250     for(i = 0; i < 7; i++) {
1251         OUT_BCS_BATCH(batch, 0x80808080);
1252     }
1253     ADVANCE_BCS_BATCH(batch);
1254
1255     BEGIN_BCS_BATCH(batch, 10);
1256     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1257     OUT_BCS_BATCH(batch, 1);                  //Select L1
1258     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1259     for(i = 0; i < 7; i++) {
1260         OUT_BCS_BATCH(batch, 0x80808080);
1261     }
1262     ADVANCE_BCS_BATCH(batch);
1263 }
1264
1265
1266 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1267                                  struct encode_state *encode_state,
1268                                  struct intel_encoder_context *encoder_context)
1269 {
1270     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1271     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1272     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1273     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1274     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1275     uint32_t mv_x, mv_y;
1276     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1277     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1278     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1279
1280     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1281         mv_x = 512;
1282         mv_y = 64;
1283     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1284         mv_x = 1024;
1285         mv_y = 128;
1286     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1287         mv_x = 2048;
1288         mv_y = 128;
1289     } else {
1290         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1291         mv_x = 512;
1292         mv_y = 64;
1293     }
1294
1295     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1296     if (pic_param->picture_type != VAEncPictureTypeIntra) {
1297         int qp, m_cost, j, mv_count;
1298         float   lambda, m_costf;
1299         slice_param = (VAEncSliceParameterBufferMPEG2 *)
1300             encode_state->slice_params_ext[0]->buffer;
1301         qp = slice_param->quantiser_scale_code;
1302         lambda = intel_lambda_qp(qp);
1303         /* No Intra prediction. So it is zero */
1304         vme_state_message[MODE_INTRA_8X8] = 0;
1305         vme_state_message[MODE_INTRA_4X4] = 0;
1306         vme_state_message[MODE_INTER_MV0] = 0;
1307         for (j = 1; j < 3; j++) {
1308             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1309             m_cost = (int)m_costf;
1310             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1311         }
1312         mv_count = 3;
1313         for (j = 4; j <= 64; j *= 2) {
1314             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1315             m_cost = (int)m_costf;
1316             vme_state_message[MODE_INTER_MV0 + mv_count] =
1317                 intel_format_lutvalue(m_cost, 0x6f);
1318             mv_count++;
1319         }
1320         m_cost = lambda;
1321         /* It can only perform the 16x16 search. So mode cost can be ignored for
1322          * the other mode. for example: 16x8/8x8
1323          */
1324         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1325         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1326
1327         vme_state_message[MODE_INTER_16X8] = 0;
1328         vme_state_message[MODE_INTER_8X8] = 0;
1329         vme_state_message[MODE_INTER_8X4] = 0;
1330         vme_state_message[MODE_INTER_4X4] = 0;
1331         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1332
1333     }
1334     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1335
1336     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1337         width_in_mbs;
1338 }
1339
1340 void
1341 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1342                                            struct encode_state *encode_state,
1343                                            int mb_width, int mb_height,
1344                                            int kernel,
1345                                            struct intel_encoder_context *encoder_context)
1346 {
1347     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1348     unsigned int *command_ptr;
1349
1350 #define         MPEG2_SCOREBOARD                (1 << 21)
1351
1352     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1353     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1354
1355     {
1356         unsigned int mb_intra_ub, score_dep;
1357         int x_outer, y_outer, x_inner, y_inner;
1358         int xtemp_outer = 0;
1359         int first_mb = 0;
1360         int num_mb = mb_width * mb_height;
1361
1362         x_outer = 0;
1363         y_outer = 0;
1364
1365
1366         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1367             x_inner = x_outer;
1368             y_inner = y_outer;
1369             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1370                 mb_intra_ub = 0;
1371                 score_dep = 0;
1372                 if (x_inner != 0) {
1373                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1374                     score_dep |= MB_SCOREBOARD_A; 
1375                 }
1376                 if (y_inner != 0) {
1377                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1378                     score_dep |= MB_SCOREBOARD_B;
1379
1380                     if (x_inner != 0)
1381                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1382
1383                     if (x_inner != (mb_width -1)) {
1384                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1385                         score_dep |= MB_SCOREBOARD_C;
1386                     }
1387                 }
1388
1389                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1390                 *command_ptr++ = kernel;
1391                 *command_ptr++ = MPEG2_SCOREBOARD;
1392                 /* Indirect data */
1393                 *command_ptr++ = 0;
1394                 /* the (X, Y) term of scoreboard */
1395                 *command_ptr++ = ((y_inner << 16) | x_inner);
1396                 *command_ptr++ = score_dep;
1397                 /*inline data */
1398                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1399                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1400                 x_inner -= 2;
1401                 y_inner += 1;
1402             }
1403             x_outer += 1;
1404         }
1405
1406         xtemp_outer = mb_width - 2;
1407         if (xtemp_outer < 0)
1408             xtemp_outer = 0;
1409         x_outer = xtemp_outer;
1410         y_outer = 0;
1411         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1412             y_inner = y_outer;
1413             x_inner = x_outer;
1414             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1415                 mb_intra_ub = 0;
1416                 score_dep = 0;
1417                 if (x_inner != 0) {
1418                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1419                     score_dep |= MB_SCOREBOARD_A; 
1420                 }
1421                 if (y_inner != 0) {
1422                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1423                     score_dep |= MB_SCOREBOARD_B;
1424
1425                     if (x_inner != 0)
1426                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1427
1428                     if (x_inner != (mb_width -1)) {
1429                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1430                         score_dep |= MB_SCOREBOARD_C;
1431                     }
1432                 }
1433
1434                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1435                 *command_ptr++ = kernel;
1436                 *command_ptr++ = MPEG2_SCOREBOARD;
1437                 /* Indirect data */
1438                 *command_ptr++ = 0;
1439                 /* the (X, Y) term of scoreboard */
1440                 *command_ptr++ = ((y_inner << 16) | x_inner);
1441                 *command_ptr++ = score_dep;
1442                 /*inline data */
1443                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1444                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1445
1446                 x_inner -= 2;
1447                 y_inner += 1;
1448             }
1449             x_outer++;
1450             if (x_outer >= mb_width) {
1451                 y_outer += 1;
1452                 x_outer = xtemp_outer;
1453             }           
1454         }
1455     }
1456
1457     *command_ptr++ = 0;
1458     *command_ptr++ = MI_BATCH_BUFFER_END;
1459
1460     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1461     return;
1462 }
1463
1464 static int
1465 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1466                           VAPictureH264 *ref_list,
1467                           int num_pictures,
1468                           int dir)
1469 {
1470     int i, found = -1, min = 0x7FFFFFFF;
1471
1472     for (i = 0; i < num_pictures; i++) {
1473         int tmp;
1474
1475         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1476             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1477             break;
1478
1479         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1480
1481         if (dir)
1482             tmp = -tmp;
1483
1484         if (tmp > 0 && tmp < min) {
1485             min = tmp;
1486             found = i;
1487         }
1488     }
1489
1490     return found;
1491 }
1492
1493 void
1494 intel_avc_vme_reference_state(VADriverContextP ctx,
1495                               struct encode_state *encode_state,
1496                               struct intel_encoder_context *encoder_context,
1497                               int list_index,
1498                               int surface_index,
1499                               void (* vme_source_surface_state)(
1500                                   VADriverContextP ctx,
1501                                   int index,
1502                                   struct object_surface *obj_surface,
1503                                   struct intel_encoder_context *encoder_context))
1504 {
1505     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1506     struct object_surface *obj_surface = NULL;
1507     struct i965_driver_data *i965 = i965_driver_data(ctx);
1508     VASurfaceID ref_surface_id;
1509     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1510     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1511     int max_num_references;
1512     VAPictureH264 *curr_pic;
1513     VAPictureH264 *ref_list;
1514     int ref_idx;
1515
1516     if (list_index == 0) {
1517         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1518         ref_list = slice_param->RefPicList0;
1519     } else {
1520         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1521         ref_list = slice_param->RefPicList1;
1522     }
1523
1524     if (max_num_references == 1) {
1525         if (list_index == 0) {
1526             ref_surface_id = slice_param->RefPicList0[0].picture_id;
1527             vme_context->used_references[0] = &slice_param->RefPicList0[0];
1528         } else {
1529             ref_surface_id = slice_param->RefPicList1[0].picture_id;
1530             vme_context->used_references[1] = &slice_param->RefPicList1[0];
1531         }
1532
1533         if (ref_surface_id != VA_INVALID_SURFACE)
1534             obj_surface = SURFACE(ref_surface_id);
1535
1536         if (!obj_surface ||
1537             !obj_surface->bo) {
1538             obj_surface = encode_state->reference_objects[list_index];
1539             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1540         }
1541
1542         ref_idx = 0;
1543     } else {
1544         curr_pic = &pic_param->CurrPic;
1545
1546         /* select the reference frame in temporal space */
1547         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1548         ref_surface_id = ref_list[ref_idx].picture_id;
1549
1550         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1551             obj_surface = SURFACE(ref_surface_id);
1552
1553         vme_context->used_reference_objects[list_index] = obj_surface;
1554         vme_context->used_references[list_index] = &ref_list[ref_idx];
1555     }
1556
1557     if (obj_surface &&
1558         obj_surface->bo) {
1559         assert(ref_idx >= 0);
1560         vme_context->used_reference_objects[list_index] = obj_surface;
1561         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1562         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1563                                                     ref_idx << 16 |
1564                                                     ref_idx <<  8 |
1565                                                     ref_idx);
1566     } else {
1567         vme_context->used_reference_objects[list_index] = NULL;
1568         vme_context->used_references[list_index] = NULL;
1569         vme_context->ref_index_in_mb[list_index] = 0;
1570     }
1571 }
1572
1573 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1574                                         struct encode_state *encode_state,
1575                                         struct intel_encoder_context *encoder_context,
1576                                         int slice_index,
1577                                         struct intel_batchbuffer *slice_batch)
1578 {
1579     int count, i, start_index;
1580     unsigned int length_in_bits;
1581     VAEncPackedHeaderParameterBuffer *param = NULL;
1582     unsigned int *header_data = NULL;
1583     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1584     int slice_header_index;
1585
1586     if (encode_state->slice_header_index[slice_index] == 0)
1587         slice_header_index = -1;
1588     else
1589         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1590
1591     count = encode_state->slice_rawdata_count[slice_index];
1592     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1593
1594     for (i = 0; i < count; i++) {
1595         unsigned int skip_emul_byte_cnt;
1596
1597         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1598
1599         param = (VAEncPackedHeaderParameterBuffer *)
1600                     (encode_state->packed_header_params_ext[start_index + i]->buffer);
1601
1602         /* skip the slice header packed data type as it is lastly inserted */
1603         if (param->type == VAEncPackedHeaderSlice)
1604             continue;
1605
1606         length_in_bits = param->bit_length;
1607
1608         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1609
1610         /* as the slice header is still required, the last header flag is set to
1611          * zero.
1612          */
1613         mfc_context->insert_object(ctx,
1614                                    encoder_context,
1615                                    header_data,
1616                                    ALIGN(length_in_bits, 32) >> 5,
1617                                    length_in_bits & 0x1f,
1618                                    skip_emul_byte_cnt,
1619                                    0,
1620                                    0,
1621                                    !param->has_emulation_bytes,
1622                                    slice_batch);
1623     }
1624
1625     if (slice_header_index == -1) {
1626         unsigned char *slice_header = NULL;
1627         int slice_header_length_in_bits = 0;
1628         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1629         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1630         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1631
1632         /* No slice header data is passed. And the driver needs to generate it */
1633         /* For the Normal H264 */
1634         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1635                                                              pPicParameter,
1636                                                              pSliceParameter,
1637                                                              &slice_header);
1638         mfc_context->insert_object(ctx, encoder_context,
1639                                    (unsigned int *)slice_header,
1640                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1641                                    slice_header_length_in_bits & 0x1f,
1642                                    5,  /* first 5 bytes are start code + nal unit type */
1643                                    1, 0, 1, slice_batch);
1644
1645         free(slice_header);
1646     } else {
1647         unsigned int skip_emul_byte_cnt;
1648
1649         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1650
1651         param = (VAEncPackedHeaderParameterBuffer *)
1652                     (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1653         length_in_bits = param->bit_length;
1654
1655         /* as the slice header is the last header data for one slice,
1656          * the last header flag is set to one.
1657          */
1658         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1659
1660         mfc_context->insert_object(ctx,
1661                                    encoder_context,
1662                                    header_data,
1663                                    ALIGN(length_in_bits, 32) >> 5,
1664                                    length_in_bits & 0x1f,
1665                                    skip_emul_byte_cnt,
1666                                    1,
1667                                    0,
1668                                    !param->has_emulation_bytes,
1669                                    slice_batch);
1670     }
1671
1672     return;
1673 }
1674
1675 void
1676 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1677                                 struct encode_state *encode_state,
1678                                 struct intel_encoder_context *encoder_context)
1679 {
1680     struct i965_driver_data *i965 = i965_driver_data(ctx);
1681     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1682     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1683     int qp;
1684     dri_bo *bo;
1685     uint8_t *cost_table;
1686
1687     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1688
1689
1690     if (slice_type == SLICE_TYPE_I) {
1691         if (vme_context->i_qp_cost_table)
1692             return;
1693     } else if (slice_type == SLICE_TYPE_P) {
1694         if (vme_context->p_qp_cost_table)
1695             return;
1696     } else {
1697         if (vme_context->b_qp_cost_table)
1698             return;
1699     }
1700
1701     /* It is enough to allocate 32 bytes for each qp. */
1702     bo = dri_bo_alloc(i965->intel.bufmgr,
1703                       "cost_table ",
1704                       QP_MAX * 32,
1705                       64);
1706
1707     dri_bo_map(bo, 1);
1708     assert(bo->virtual);
1709     cost_table = (uint8_t *)(bo->virtual);
1710     for (qp = 0; qp < QP_MAX; qp++) {
1711         intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1712         cost_table += 32;
1713     }
1714
1715     dri_bo_unmap(bo);
1716
1717     if (slice_type == SLICE_TYPE_I) {
1718         vme_context->i_qp_cost_table = bo;
1719     } else if (slice_type == SLICE_TYPE_P) {
1720         vme_context->p_qp_cost_table = bo;
1721     } else {
1722         vme_context->b_qp_cost_table = bo;
1723     }
1724
1725     vme_context->cost_table_size = QP_MAX * 32;
1726     return;
1727 }
1728
1729 extern void
1730 intel_h264_setup_cost_surface(VADriverContextP ctx,
1731                               struct encode_state *encode_state,
1732                               struct intel_encoder_context *encoder_context,
1733                               unsigned long binding_table_offset,
1734                               unsigned long surface_state_offset)
1735 {
1736     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1737     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1738     dri_bo *bo;
1739
1740
1741     struct i965_buffer_surface cost_table;
1742
1743     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1744
1745
1746     if (slice_type == SLICE_TYPE_I) {
1747         bo = vme_context->i_qp_cost_table;
1748     } else if (slice_type == SLICE_TYPE_P) {
1749         bo = vme_context->p_qp_cost_table;
1750     } else {
1751         bo = vme_context->b_qp_cost_table;
1752     }
1753
1754     cost_table.bo = bo;
1755     cost_table.num_blocks = QP_MAX;
1756     cost_table.pitch = 16;
1757     cost_table.size_block = 32;
1758
1759     vme_context->vme_buffer_suface_setup(ctx,
1760                                          &vme_context->gpe_context,
1761                                          &cost_table,
1762                                          binding_table_offset,
1763                                          surface_state_offset);
1764 }
1765
1766 extern void
1767 intel_h264_enc_roi_config(VADriverContextP ctx,
1768                           struct encode_state *encode_state,
1769                           struct intel_encoder_context *encoder_context)
1770 {
1771     VAEncMiscParameterBuffer* pMiscParamROI;
1772     VAEncMiscParameterBufferROI *pParamROI;
1773     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1774     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1775     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1776     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1777     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1778
1779     vme_context->roi_enabled = 0;
1780     /* Restriction: Disable ROI when multi-slice is enabled */
1781     if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1782         return;
1783
1784     if (encode_state->misc_param[VAEncMiscParameterTypeROI] == NULL) {
1785         return;
1786     }
1787
1788     pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI]->buffer;
1789     pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
1790
1791     /* check whether number of ROI is correct */
1792     /* currently one region is supported */
1793     if (pParamROI->num_roi != 1) {
1794         return;
1795     }
1796
1797     vme_context->roi_enabled = 1;
1798
1799     if ((vme_context->saved_width_mbs !=  width_in_mbs) ||
1800         (vme_context->saved_height_mbs != height_in_mbs)) {
1801         free(vme_context->qp_per_mb);
1802         vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1803
1804         vme_context->saved_width_mbs = width_in_mbs;
1805         vme_context->saved_height_mbs = height_in_mbs;
1806         assert(vme_context->qp_per_mb);
1807     }
1808     if (encoder_context->rate_control_mode == VA_RC_CBR) {
1809         /*
1810          * TODO: More complex Qp adjust needs to be added.
1811          * Currently it is initialized to slice_qp.
1812          */
1813         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1814         int qp;
1815         int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1816
1817         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1818         memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1819     } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1820         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1821         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1822         int qp;
1823
1824         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1825         memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1826     } else {
1827         /*
1828          * TODO: Disable it for non CBR-CQP.
1829          */
1830         vme_context->roi_enabled = 0;
1831     }
1832     return;
1833 }
1834
1835 /* HEVC */
1836 static int
1837 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
1838                            VAPictureHEVC *ref_list,
1839                            int num_pictures,
1840                            int dir)
1841 {
1842     int i, found = -1, min = 0x7FFFFFFF;
1843
1844     for (i = 0; i < num_pictures; i++) {
1845         int tmp;
1846
1847         if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
1848             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1849             break;
1850
1851         tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
1852
1853         if (dir)
1854             tmp = -tmp;
1855
1856         if (tmp > 0 && tmp < min) {
1857             min = tmp;
1858             found = i;
1859         }
1860     }
1861
1862     return found;
1863 }
1864 void
1865 intel_hevc_vme_reference_state(VADriverContextP ctx,
1866                                struct encode_state *encode_state,
1867                                struct intel_encoder_context *encoder_context,
1868                                int list_index,
1869                                int surface_index,
1870                                void (* vme_source_surface_state)(
1871                                    VADriverContextP ctx,
1872                                    int index,
1873                                    struct object_surface *obj_surface,
1874                                    struct intel_encoder_context *encoder_context))
1875 {
1876     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1877     struct object_surface *obj_surface = NULL;
1878     struct i965_driver_data *i965 = i965_driver_data(ctx);
1879     VASurfaceID ref_surface_id;
1880     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1881     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1882     int max_num_references;
1883     VAPictureHEVC *curr_pic;
1884     VAPictureHEVC *ref_list;
1885     int ref_idx;
1886
1887     if (list_index == 0) {
1888         max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
1889         ref_list = slice_param->ref_pic_list0;
1890     } else {
1891         max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
1892         ref_list = slice_param->ref_pic_list1;
1893     }
1894
1895     if (max_num_references == 1) {
1896         if (list_index == 0) {
1897             ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
1898             vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
1899         } else {
1900             ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
1901             vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
1902         }
1903
1904         if (ref_surface_id != VA_INVALID_SURFACE)
1905             obj_surface = SURFACE(ref_surface_id);
1906
1907         if (!obj_surface ||
1908             !obj_surface->bo) {
1909             obj_surface = encode_state->reference_objects[list_index];
1910             vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
1911         }
1912
1913         ref_idx = 0;
1914     } else {
1915         curr_pic = &pic_param->decoded_curr_pic;
1916
1917         /* select the reference frame in temporal space */
1918         ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1919         ref_surface_id = ref_list[ref_idx].picture_id;
1920
1921         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1922             obj_surface = SURFACE(ref_surface_id);
1923
1924         vme_context->used_reference_objects[list_index] = obj_surface;
1925         vme_context->used_references[list_index] = &ref_list[ref_idx];
1926     }
1927
1928     if (obj_surface &&
1929         obj_surface->bo) {
1930         assert(ref_idx >= 0);
1931         vme_context->used_reference_objects[list_index] = obj_surface;
1932         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1933         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1934                 ref_idx << 16 |
1935                 ref_idx <<  8 |
1936                 ref_idx);
1937     } else {
1938         vme_context->used_reference_objects[list_index] = NULL;
1939         vme_context->used_references[list_index] = NULL;
1940         vme_context->ref_index_in_mb[list_index] = 0;
1941     }
1942 }
1943
1944 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
1945                                      struct encode_state *encode_state,
1946                                      struct intel_encoder_context *encoder_context)
1947 {
1948     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1949     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1950     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1951     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1952     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1953     int qp, m_cost, j, mv_count;
1954     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
1955     float   lambda, m_costf;
1956
1957     /* here no SI SP slice for HEVC, do not need slice fixup */
1958     int slice_type = slice_param->slice_type;
1959
1960
1961     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1962
1963     if(encoder_context->rate_control_mode == VA_RC_CBR)
1964     {
1965         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1966         if(slice_type == HEVC_SLICE_B) {
1967             if(pSequenceParameter->ip_period == 1)
1968             {
1969                 slice_type = HEVC_SLICE_P;
1970                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1971
1972             }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
1973                 slice_type = HEVC_SLICE_P;
1974                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1975             }
1976         }
1977
1978     }
1979
1980     if (vme_state_message == NULL)
1981         return;
1982
1983     assert(qp <= QP_MAX);
1984     lambda = intel_lambda_qp(qp);
1985     if (slice_type == HEVC_SLICE_I) {
1986         vme_state_message[MODE_INTRA_16X16] = 0;
1987         m_cost = lambda * 4;
1988         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
1989         m_cost = lambda * 16;
1990         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
1991         m_cost = lambda * 3;
1992         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
1993     } else {
1994         m_cost = 0;
1995         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
1996         for (j = 1; j < 3; j++) {
1997             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1998             m_cost = (int)m_costf;
1999             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2000         }
2001         mv_count = 3;
2002         for (j = 4; j <= 64; j *= 2) {
2003             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2004             m_cost = (int)m_costf;
2005             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2006             mv_count++;
2007         }
2008
2009         if (qp <= 25) {
2010             vme_state_message[MODE_INTRA_16X16] = 0x4a;
2011             vme_state_message[MODE_INTRA_8X8] = 0x4a;
2012             vme_state_message[MODE_INTRA_4X4] = 0x4a;
2013             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2014             vme_state_message[MODE_INTER_16X16] = 0x4a;
2015             vme_state_message[MODE_INTER_16X8] = 0x4a;
2016             vme_state_message[MODE_INTER_8X8] = 0x4a;
2017             vme_state_message[MODE_INTER_8X4] = 0x4a;
2018             vme_state_message[MODE_INTER_4X4] = 0x4a;
2019             vme_state_message[MODE_INTER_BWD] = 0x2a;
2020             return;
2021         }
2022         m_costf = lambda * 10;
2023         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2024         m_cost = lambda * 14;
2025         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2026         m_cost = lambda * 24;
2027         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2028         m_costf = lambda * 3.5;
2029         m_cost = m_costf;
2030         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2031         if (slice_type == HEVC_SLICE_P) {
2032             m_costf = lambda * 2.5;
2033             m_cost = m_costf;
2034             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2035             m_costf = lambda * 4;
2036             m_cost = m_costf;
2037             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2038             m_costf = lambda * 1.5;
2039             m_cost = m_costf;
2040             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2041             m_costf = lambda * 3;
2042             m_cost = m_costf;
2043             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2044             m_costf = lambda * 5;
2045             m_cost = m_costf;
2046             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2047             /* BWD is not used in P-frame */
2048             vme_state_message[MODE_INTER_BWD] = 0;
2049         } else {
2050             m_costf = lambda * 2.5;
2051             m_cost = m_costf;
2052             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2053             m_costf = lambda * 5.5;
2054             m_cost = m_costf;
2055             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2056             m_costf = lambda * 3.5;
2057             m_cost = m_costf;
2058             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2059             m_costf = lambda * 5.0;
2060             m_cost = m_costf;
2061             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2062             m_costf = lambda * 6.5;
2063             m_cost = m_costf;
2064             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2065             m_costf = lambda * 1.5;
2066             m_cost = m_costf;
2067             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
2068         }
2069     }
2070 }