OSDN Git Service

Merge branch 'v1.7-branch' into fdo--master
[android-x86/hardware-intel-common-vaapi.git] / src / gen6_mfc_common.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "gen9_mfc.h"
45 #include "intel_media.h"
46
47 #ifndef HAVE_LOG2F
48 #define log2f(x) (logf(x)/(float)M_LN2)
49 #endif
50
51 int intel_avc_enc_slice_type_fixup(int slice_type)
52 {
53     if (slice_type == SLICE_TYPE_SP ||
54         slice_type == SLICE_TYPE_P)
55         slice_type = SLICE_TYPE_P;
56     else if (slice_type == SLICE_TYPE_SI ||
57              slice_type == SLICE_TYPE_I)
58         slice_type = SLICE_TYPE_I;
59     else {
60         if (slice_type != SLICE_TYPE_B)
61             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
62
63         slice_type = SLICE_TYPE_B;
64     }
65
66     return slice_type;
67 }
68
69 static void
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
71                                         struct intel_encoder_context *encoder_context)
72 {
73     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
74     int i;
75
76     for(i = 0 ; i < 3; i++) {
77         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79         mfc_context->bit_rate_control_context[i].GrowInit = 6;
80         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
83         
84         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
90     }
91 }
92
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94                                struct intel_encoder_context* encoder_context)
95 {
96     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97     double bitrate, framerate;
98     double frame_per_bits = 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99     double qp1_size = 0.1 * frame_per_bits;
100     double qp51_size = 0.001 * frame_per_bits;
101     int min_qp = MAX(1, encoder_context->brc.min_qp);
102     double bpf, factor, hrd_factor;
103     int inum = encoder_context->brc.num_iframes_in_gop,
104         pnum = encoder_context->brc.num_pframes_in_gop,
105         bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
106     int intra_period = encoder_context->brc.gop_size;
107     int i;
108
109     if (encoder_context->layer.num_layers > 1)
110         qp1_size = 0.15 * frame_per_bits;
111
112     mfc_context->brc.mode = encoder_context->rate_control_mode;
113
114     mfc_context->hrd.violation_noted = 0;
115
116     for (i = 0; i < encoder_context->layer.num_layers; i++) {
117         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
118         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
119         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
120
121         if (i == 0) {
122             bitrate = encoder_context->brc.bits_per_second[0];
123             framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
124         } else {
125             bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
126             framerate = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) -
127                 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
128         }
129
130         if (i == encoder_context->layer.num_layers - 1)
131             factor = 1.0;
132         else {
133             factor = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) /
134                 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
135         }
136
137         hrd_factor = (double)bitrate / encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
138
139         mfc_context->hrd.buffer_size[i] = (unsigned int)(encoder_context->brc.hrd_buffer_size * hrd_factor);
140         mfc_context->hrd.current_buffer_fullness[i] =
141             (double)(encoder_context->brc.hrd_initial_buffer_fullness < encoder_context->brc.hrd_buffer_size) ?
142             encoder_context->brc.hrd_initial_buffer_fullness : encoder_context->brc.hrd_buffer_size / 2.;
143         mfc_context->hrd.current_buffer_fullness[i] *= hrd_factor;
144         mfc_context->hrd.target_buffer_fullness[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / 2.;
145         mfc_context->hrd.buffer_capacity[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / qp1_size;
146
147         if (encoder_context->layer.num_layers > 1) {
148             if (i == 0) {
149                 intra_period = (int)(encoder_context->brc.gop_size * factor);
150                 inum = 1;
151                 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor);
152                 bnum = intra_period - inum - pnum;
153             } else {
154                 intra_period = (int)(encoder_context->brc.gop_size * factor) - intra_period;
155                 inum = 0;
156                 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor) - pnum;
157                 bnum = intra_period - inum - pnum;
158             }
159         }
160
161         mfc_context->brc.gop_nums[i][SLICE_TYPE_I] = inum;
162         mfc_context->brc.gop_nums[i][SLICE_TYPE_P] = pnum;
163         mfc_context->brc.gop_nums[i][SLICE_TYPE_B] = bnum;
164
165         mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
166                                                                     (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
167         mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
168         mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
169
170         bpf = mfc_context->brc.bits_per_frame[i] = bitrate/framerate;
171
172         if (encoder_context->brc.initial_qp) {
173             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = encoder_context->brc.initial_qp;
174             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = encoder_context->brc.initial_qp;
175             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = encoder_context->brc.initial_qp;
176         } else {
177             if ((bpf > qp51_size) && (bpf < qp1_size)) {
178                 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
179             }
180             else if (bpf >= qp1_size)
181                 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
182             else if (bpf <= qp51_size)
183                 mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
184
185             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
186             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
187         }
188
189         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], min_qp, 51);
190         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], min_qp, 51);
191         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], min_qp, 51);
192     }
193 }
194
195 int intel_mfc_update_hrd(struct encode_state *encode_state,
196                          struct intel_encoder_context *encoder_context,
197                          int frame_bits)
198 {
199     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
200     int layer_id = encoder_context->layer.curr_frame_layer_id;
201     double prev_bf = mfc_context->hrd.current_buffer_fullness[layer_id];
202
203     mfc_context->hrd.current_buffer_fullness[layer_id] -= frame_bits;
204
205     if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] <= 0.) {
206         mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
207         return BRC_UNDERFLOW;
208     }
209     
210     mfc_context->hrd.current_buffer_fullness[layer_id] += mfc_context->brc.bits_per_frame[layer_id];
211     if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] > mfc_context->hrd.buffer_size[layer_id]) {
212         if (mfc_context->brc.mode == VA_RC_VBR)
213             mfc_context->hrd.current_buffer_fullness[layer_id] = mfc_context->hrd.buffer_size[layer_id];
214         else {
215             mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
216             return BRC_OVERFLOW;
217         }
218     }
219     return BRC_NO_HRD_VIOLATION;
220 }
221
222 int intel_mfc_brc_postpack(struct encode_state *encode_state,
223                            struct intel_encoder_context *encoder_context,
224                            int frame_bits)
225 {
226     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
227     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
228     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
229     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
230     int curr_frame_layer_id, next_frame_layer_id;
231     int qpi, qpp, qpb;
232     int qp; // quantizer of previously encoded slice of current type
233     int qpn; // predicted quantizer for next frame of current type in integer format
234     double qpf; // predicted quantizer for next frame of current type in float format
235     double delta_qp; // QP correction
236     int min_qp = MAX(1, encoder_context->brc.min_qp);
237     int target_frame_size, frame_size_next;
238     /* Notes:
239      *  x - how far we are from HRD buffer borders
240      *  y - how far we are from target HRD buffer fullness
241      */
242     double x, y;
243     double frame_size_alpha;
244
245     if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
246         curr_frame_layer_id = 0;
247         next_frame_layer_id = 0;
248     } else {
249         curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
250         next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
251     }
252
253     /* checking wthether HRD compliance first */
254     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
255
256     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
257         /* nothing */
258     } else {
259         next_frame_layer_id = curr_frame_layer_id;
260     }
261
262     mfc_context->brc.bits_prev_frame[curr_frame_layer_id] = frame_bits;
263     frame_bits = mfc_context->brc.bits_prev_frame[next_frame_layer_id];
264
265     mfc_context->brc.prev_slice_type[curr_frame_layer_id] = slicetype;
266     slicetype = mfc_context->brc.prev_slice_type[next_frame_layer_id];
267
268     /* 0 means the next frame is the first frame of next layer */
269     if (frame_bits == 0)
270         return sts;
271
272     qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
273     qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
274     qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
275
276     qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
277
278     target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
279     if (mfc_context->hrd.buffer_capacity[next_frame_layer_id] < 5)
280         frame_size_alpha = 0;
281     else
282         frame_size_alpha = (double)mfc_context->brc.gop_nums[next_frame_layer_id][slicetype];
283     if (frame_size_alpha > 30) frame_size_alpha = 30;
284     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
285         (double)(frame_size_alpha + 1.);
286
287     /* frame_size_next: avoiding negative number and too small value */
288     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
289         frame_size_next = (int)((double)target_frame_size * 0.25);
290
291     qpf = (double)qp * target_frame_size / frame_size_next;
292     qpn = (int)(qpf + 0.5);
293
294     if (qpn == qp) {
295         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
296         mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] += qpf - qpn;
297         if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] > 1.0) {
298             qpn++;
299             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
300         } else if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] < -1.0) {
301             qpn--;
302             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
303         }
304     }
305     /* making sure that QP is not changing too fast */
306     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
307     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
308     /* making sure that with QP predictions we did do not leave QPs range */
309     BRC_CLIP(qpn, 1, 51);
310
311     /* calculating QP delta as some function*/
312     x = mfc_context->hrd.target_buffer_fullness[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
313     if (x > 0) {
314         x /= mfc_context->hrd.target_buffer_fullness[next_frame_layer_id];
315         y = mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
316     }
317     else {
318         x /= (mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.target_buffer_fullness[next_frame_layer_id]);
319         y = mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
320     }
321     if (y < 0.01) y = 0.01;
322     if (x > 1) x = 1;
323     else if (x < -1) x = -1;
324
325     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
326     qpn = (int)(qpn + delta_qp + 0.5);
327
328     /* making sure that with QP predictions we did do not leave QPs range */
329     BRC_CLIP(qpn, min_qp, 51);
330
331     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
332         /* correcting QPs of slices of other types */
333         if (slicetype == SLICE_TYPE_P) {
334             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
335                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
336             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
337                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
338         } else if (slicetype == SLICE_TYPE_I) {
339             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
340                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
341             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
342                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
343         } else { // SLICE_TYPE_B
344             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
345                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
346             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
347                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
348         }
349         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], min_qp, 51);
350         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], min_qp, 51);
351         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], min_qp, 51);
352     } else if (sts == BRC_UNDERFLOW) { // underflow
353         if (qpn <= qp) qpn = qp + 1;
354         if (qpn > 51) {
355             qpn = 51;
356             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
357         }
358     } else if (sts == BRC_OVERFLOW) {
359         if (qpn >= qp) qpn = qp - 1;
360         if (qpn < min_qp) { // overflow with minQP
361             qpn = min_qp;
362             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
363         }
364     }
365
366     mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
367
368     return sts;
369 }
370
371 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
372                                        struct intel_encoder_context *encoder_context)
373 {
374     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
375     unsigned int rate_control_mode = encoder_context->rate_control_mode;
376     int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
377     
378     // current we only support CBR mode.
379     if (rate_control_mode == VA_RC_CBR) {
380         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
381         mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
382         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
383         mfc_context->vui_hrd.i_frame_number = 0;
384
385         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; 
386         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
387         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
388     }
389
390 }
391
392 void 
393 intel_mfc_hrd_context_update(struct encode_state *encode_state, 
394                              struct gen6_mfc_context *mfc_context)
395 {
396     mfc_context->vui_hrd.i_frame_number++;
397 }
398
399 int intel_mfc_interlace_check(VADriverContextP ctx,
400                               struct encode_state *encode_state,
401                               struct intel_encoder_context *encoder_context)
402 {
403     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
404     VAEncSliceParameterBufferH264 *pSliceParameter;
405     int i;
406     int mbCount = 0;
407     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
408     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
409   
410     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
411         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer; 
412         mbCount += pSliceParameter->num_macroblocks; 
413     }
414     
415     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
416         return 0;
417
418     return 1;
419 }
420
421 void intel_mfc_brc_prepare(struct encode_state *encode_state,
422                            struct intel_encoder_context *encoder_context)
423 {
424     unsigned int rate_control_mode = encoder_context->rate_control_mode;
425
426     if (encoder_context->codec != CODEC_H264 &&
427         encoder_context->codec != CODEC_H264_MVC)
428         return;
429
430     if (rate_control_mode == VA_RC_CBR) {
431         /*Programing bit rate control */
432         if (encoder_context->brc.need_reset) {
433             intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
434             intel_mfc_brc_init(encode_state, encoder_context);
435         }
436
437         /*Programing HRD control */
438         if (encoder_context->brc.need_reset)
439             intel_mfc_hrd_context_init(encode_state, encoder_context);    
440     }
441 }
442
443 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
444                                               struct encode_state *encode_state,
445                                               struct intel_encoder_context *encoder_context,
446                                               struct intel_batchbuffer *slice_batch)
447 {
448     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
449     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
450     unsigned int rate_control_mode = encoder_context->rate_control_mode;
451     unsigned int skip_emul_byte_cnt;
452
453     if (encode_state->packed_header_data[idx]) {
454         VAEncPackedHeaderParameterBuffer *param = NULL;
455         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
456         unsigned int length_in_bits;
457
458         assert(encode_state->packed_header_param[idx]);
459         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
460         length_in_bits = param->bit_length;
461
462         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
463         mfc_context->insert_object(ctx,
464                                    encoder_context,
465                                    header_data,
466                                    ALIGN(length_in_bits, 32) >> 5,
467                                    length_in_bits & 0x1f,
468                                    skip_emul_byte_cnt,
469                                    0,
470                                    0,
471                                    !param->has_emulation_bytes,
472                                    slice_batch);
473     }
474
475     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
476
477     if (encode_state->packed_header_data[idx]) {
478         VAEncPackedHeaderParameterBuffer *param = NULL;
479         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
480         unsigned int length_in_bits;
481
482         assert(encode_state->packed_header_param[idx]);
483         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
484         length_in_bits = param->bit_length;
485
486         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
487
488         mfc_context->insert_object(ctx,
489                                    encoder_context,
490                                    header_data,
491                                    ALIGN(length_in_bits, 32) >> 5,
492                                    length_in_bits & 0x1f,
493                                    skip_emul_byte_cnt,
494                                    0,
495                                    0,
496                                    !param->has_emulation_bytes,
497                                    slice_batch);
498     }
499     
500     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
501
502     if (encode_state->packed_header_data[idx]) {
503         VAEncPackedHeaderParameterBuffer *param = NULL;
504         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
505         unsigned int length_in_bits;
506
507         assert(encode_state->packed_header_param[idx]);
508         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
509         length_in_bits = param->bit_length;
510
511         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
512         mfc_context->insert_object(ctx,
513                                    encoder_context,
514                                    header_data,
515                                    ALIGN(length_in_bits, 32) >> 5,
516                                    length_in_bits & 0x1f,
517                                    skip_emul_byte_cnt,
518                                    0,
519                                    0,
520                                    !param->has_emulation_bytes,
521                                    slice_batch);
522     } else if (rate_control_mode == VA_RC_CBR) {
523         // this is frist AU
524         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
525
526         unsigned char *sei_data = NULL;
527     
528         int length_in_bits = build_avc_sei_buffer_timing(
529             mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
530             mfc_context->vui_hrd.i_initial_cpb_removal_delay,
531             0,
532             mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
533             mfc_context->vui_hrd.i_dpb_output_delay_length,
534             0,
535             &sei_data);
536         mfc_context->insert_object(ctx,
537                                    encoder_context,
538                                    (unsigned int *)sei_data,
539                                    ALIGN(length_in_bits, 32) >> 5,
540                                    length_in_bits & 0x1f,
541                                    5,
542                                    0,   
543                                    0,   
544                                    1,
545                                    slice_batch);  
546         free(sei_data);
547     }
548 }
549
550 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, 
551                                struct encode_state *encode_state,
552                                struct intel_encoder_context *encoder_context)
553 {
554     struct i965_driver_data *i965 = i965_driver_data(ctx);
555     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
556     struct object_surface *obj_surface; 
557     struct object_buffer *obj_buffer;
558     GenAvcSurface *gen6_avc_surface;
559     dri_bo *bo;
560     VAStatus vaStatus = VA_STATUS_SUCCESS;
561     int i, j, enable_avc_ildb = 0;
562     VAEncSliceParameterBufferH264 *slice_param;
563     struct i965_coded_buffer_segment *coded_buffer_segment;
564     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
565     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
566     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
567
568     if (IS_GEN6(i965->intel.device_info)) {
569         /* On the SNB it should be fixed to 128 for the DMV buffer */
570         width_in_mbs = 128;
571     }
572
573     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
574         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
575         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
576
577         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
578             assert((slice_param->slice_type == SLICE_TYPE_I) ||
579                    (slice_param->slice_type == SLICE_TYPE_SI) ||
580                    (slice_param->slice_type == SLICE_TYPE_P) ||
581                    (slice_param->slice_type == SLICE_TYPE_SP) ||
582                    (slice_param->slice_type == SLICE_TYPE_B));
583
584             if (slice_param->disable_deblocking_filter_idc != 1) {
585                 enable_avc_ildb = 1;
586                 break;
587             }
588
589             slice_param++;
590         }
591     }
592
593     /*Setup all the input&output object*/
594
595     /* Setup current frame and current direct mv buffer*/
596     obj_surface = encode_state->reconstructed_object;
597     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
598
599     if ( obj_surface->private_data == NULL) {
600         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
601         assert(gen6_avc_surface);
602         gen6_avc_surface->dmv_top = 
603             dri_bo_alloc(i965->intel.bufmgr,
604                          "Buffer",
605                          68 * width_in_mbs * height_in_mbs, 
606                          64);
607         gen6_avc_surface->dmv_bottom = 
608             dri_bo_alloc(i965->intel.bufmgr,
609                          "Buffer",
610                          68 * width_in_mbs * height_in_mbs, 
611                          64);
612         assert(gen6_avc_surface->dmv_top);
613         assert(gen6_avc_surface->dmv_bottom);
614         obj_surface->private_data = (void *)gen6_avc_surface;
615         obj_surface->free_private_data = (void *)gen_free_avc_surface; 
616     }
617     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
618     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
619     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
620     dri_bo_reference(gen6_avc_surface->dmv_top);
621     dri_bo_reference(gen6_avc_surface->dmv_bottom);
622
623     if (enable_avc_ildb) {
624         mfc_context->post_deblocking_output.bo = obj_surface->bo;
625         dri_bo_reference(mfc_context->post_deblocking_output.bo);
626     } else {
627         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
628         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
629     }
630
631     mfc_context->surface_state.width = obj_surface->orig_width;
632     mfc_context->surface_state.height = obj_surface->orig_height;
633     mfc_context->surface_state.w_pitch = obj_surface->width;
634     mfc_context->surface_state.h_pitch = obj_surface->height;
635     
636     /* Setup reference frames and direct mv buffers*/
637     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
638         obj_surface = encode_state->reference_objects[i];
639         
640         if (obj_surface && obj_surface->bo) {
641             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
642             dri_bo_reference(obj_surface->bo);
643
644             /* Check DMV buffer */
645             if ( obj_surface->private_data == NULL) {
646                 
647                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
648                 assert(gen6_avc_surface);
649                 gen6_avc_surface->dmv_top = 
650                     dri_bo_alloc(i965->intel.bufmgr,
651                                  "Buffer",
652                                  68 * width_in_mbs * height_in_mbs, 
653                                  64);
654                 gen6_avc_surface->dmv_bottom = 
655                     dri_bo_alloc(i965->intel.bufmgr,
656                                  "Buffer",
657                                  68 * width_in_mbs * height_in_mbs, 
658                                  64);
659                 assert(gen6_avc_surface->dmv_top);
660                 assert(gen6_avc_surface->dmv_bottom);
661                 obj_surface->private_data = gen6_avc_surface;
662                 obj_surface->free_private_data = gen_free_avc_surface; 
663             }
664     
665             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
666             /* Setup DMV buffer */
667             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
668             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
669             dri_bo_reference(gen6_avc_surface->dmv_top);
670             dri_bo_reference(gen6_avc_surface->dmv_bottom);
671         } else {
672             break;
673         }
674     }
675
676     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
677     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
678
679     obj_buffer = encode_state->coded_buf_object;
680     bo = obj_buffer->buffer_store->bo;
681     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
682     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
683     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
684     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
685     
686     dri_bo_map(bo, 1);
687     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
688     coded_buffer_segment->mapped = 0;
689     coded_buffer_segment->codec = encoder_context->codec;
690     dri_bo_unmap(bo);
691
692     return vaStatus;
693 }
694 /*
695  * The LUT uses the pair of 4-bit units: (shift, base) structure.
696  * 2^K * X = value . 
697  * So it is necessary to convert one cost into the nearest LUT format.
698  * The derivation is:
699  * 2^K *x = 2^n * (1 + deltaX)
700  *    k + log2(x) = n + log2(1 + deltaX)
701  *    log2(x) = n - k + log2(1 + deltaX)
702  *    As X is in the range of [1, 15]
703  *      4 > n - k + log2(1 + deltaX) >= 0 
704  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
705  *    Then we can derive the corresponding K and get the nearest LUT format.
706  */
707 int intel_format_lutvalue(int value, int max)
708 {
709     int ret;
710     int logvalue, temp1, temp2;
711
712     if (value <= 0)
713         return 0;
714
715     logvalue = (int)(log2f((float)value));
716     if (logvalue < 4) {
717         ret = value;
718     } else {
719         int error, temp_value, base, j, temp_err;
720         error = value;
721         j = logvalue - 4 + 1;
722         ret = -1;
723         for(; j <= logvalue; j++) {
724             if (j == 0) {
725                 base = value >> j;
726             } else {
727                 base = (value + (1 << (j - 1)) - 1) >> j;
728             }
729             if (base >= 16)
730                 continue;
731
732             temp_value = base << j;
733             temp_err = abs(value - temp_value);
734             if (temp_err < error) {
735                 error = temp_err;
736                 ret = (j << 4) | base;
737                 if (temp_err == 0)
738                     break;
739             }
740         }
741     }
742     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
743     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
744     if (temp1 > temp2)
745         ret = max;
746     return ret;
747
748 }
749
750
751 #define         QP_MAX                  52
752 #define         VP8_QP_MAX              128
753
754
755 static float intel_lambda_qp(int qp)
756 {
757     float value, lambdaf;
758     value = qp;
759     value = value / 6 - 2;
760     if (value < 0)
761         value = 0;
762     lambdaf = roundf(powf(2, value));
763     return lambdaf;
764 }
765
766 static
767 void intel_h264_calc_mbmvcost_qp(int qp,
768                                  int slice_type,
769                                  uint8_t *vme_state_message)
770 {
771     int m_cost, j, mv_count;
772     float   lambda, m_costf;
773
774     assert(qp <= QP_MAX); 
775     lambda = intel_lambda_qp(qp);
776
777     m_cost = lambda;
778     vme_state_message[MODE_CHROMA_INTRA] = 0;
779     vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
780
781     if (slice_type == SLICE_TYPE_I) {
782         vme_state_message[MODE_INTRA_16X16] = 0;
783         m_cost = lambda * 4;
784         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
785         m_cost = lambda * 16; 
786         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
787         m_cost = lambda * 3;
788         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
789     } else {
790         m_cost = 0;
791         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
792         for (j = 1; j < 3; j++) {
793             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
794             m_cost = (int)m_costf;
795             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
796         }
797         mv_count = 3;
798         for (j = 4; j <= 64; j *= 2) {
799             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
800             m_cost = (int)m_costf;
801             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
802             mv_count++;
803         }
804
805         if (qp <= 25) {
806             vme_state_message[MODE_INTRA_16X16] = 0x4a;
807             vme_state_message[MODE_INTRA_8X8] = 0x4a;
808             vme_state_message[MODE_INTRA_4X4] = 0x4a;
809             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
810             vme_state_message[MODE_INTER_16X16] = 0x4a;
811             vme_state_message[MODE_INTER_16X8] = 0x4a;
812             vme_state_message[MODE_INTER_8X8] = 0x4a;
813             vme_state_message[MODE_INTER_8X4] = 0x4a;
814             vme_state_message[MODE_INTER_4X4] = 0x4a;
815             vme_state_message[MODE_INTER_BWD] = 0x2a;
816             return;
817         }
818         m_costf = lambda * 10;
819         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
820         m_cost = lambda * 14;
821         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
822         m_cost = lambda * 24; 
823         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
824         m_costf = lambda * 3.5;
825         m_cost = m_costf;
826         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
827         if (slice_type == SLICE_TYPE_P) {
828             m_costf = lambda * 2.5;
829             m_cost = m_costf;
830             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
831             m_costf = lambda * 4;
832             m_cost = m_costf;
833             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
834             m_costf = lambda * 1.5;
835             m_cost = m_costf;
836             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
837             m_costf = lambda * 3;
838             m_cost = m_costf;
839             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
840             m_costf = lambda * 5;
841             m_cost = m_costf;
842             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
843             /* BWD is not used in P-frame */
844             vme_state_message[MODE_INTER_BWD] = 0;
845         } else {
846             m_costf = lambda * 2.5;
847             m_cost = m_costf;
848             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
849             m_costf = lambda * 5.5;
850             m_cost = m_costf;
851             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
852             m_costf = lambda * 3.5;
853             m_cost = m_costf;
854             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
855             m_costf = lambda * 5.0;
856             m_cost = m_costf;
857             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
858             m_costf = lambda * 6.5;
859             m_cost = m_costf;
860             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
861             m_costf = lambda * 1.5;
862             m_cost = m_costf;
863             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
864         }
865     }
866     return;
867 }
868
869 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
870                                 struct encode_state *encode_state,
871                                 struct intel_encoder_context *encoder_context)
872 {
873     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
874     struct gen6_vme_context *vme_context = encoder_context->vme_context;
875     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
876     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
877     int qp;
878     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
879
880     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
881
882     if (encoder_context->rate_control_mode == VA_RC_CQP)
883         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
884     else
885         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
886
887     if (vme_state_message == NULL)
888         return;
889
890     intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
891 }
892
893 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
894                                 struct encode_state *encode_state,
895                                 struct intel_encoder_context *encoder_context)
896 {
897     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
898     struct gen6_vme_context *vme_context = encoder_context->vme_context;
899     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
900     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
901     int qp, m_cost, j, mv_count;
902     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
903     float   lambda, m_costf;
904
905     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
906     int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
907   
908     if (vme_state_message == NULL)
909         return;
910  
911     if (encoder_context->rate_control_mode == VA_RC_CQP)
912         qp = q_matrix->quantization_index[0];
913     else
914         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
915
916     lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
917
918     m_cost = lambda;
919     vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
920
921     if (is_key_frame) {
922         vme_state_message[MODE_INTRA_16X16] = 0;
923         m_cost = lambda * 16; 
924         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
925         m_cost = lambda * 3;
926         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
927     } else {
928         m_cost = 0;
929         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
930         for (j = 1; j < 3; j++) {
931             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
932             m_cost = (int)m_costf;
933             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
934         }
935         mv_count = 3;
936         for (j = 4; j <= 64; j *= 2) {
937             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
938             m_cost = (int)m_costf;
939             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
940             mv_count++;
941         }
942
943         if (qp < 92 ) {
944             vme_state_message[MODE_INTRA_16X16] = 0x4a;
945             vme_state_message[MODE_INTRA_4X4] = 0x4a;
946             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
947             vme_state_message[MODE_INTER_16X16] = 0x4a;
948             vme_state_message[MODE_INTER_16X8] = 0x4a;
949             vme_state_message[MODE_INTER_8X8] = 0x4a;
950             vme_state_message[MODE_INTER_4X4] = 0x4a;
951             vme_state_message[MODE_INTER_BWD] = 0;
952             return;
953         }
954         m_costf = lambda * 10;
955         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
956         m_cost = lambda * 24; 
957         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
958             
959         m_costf = lambda * 3.5;
960         m_cost = m_costf;
961         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
962
963         m_costf = lambda * 2.5;
964         m_cost = m_costf;
965         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
966         m_costf = lambda * 4;
967         m_cost = m_costf;
968         vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
969         m_costf = lambda * 1.5;
970         m_cost = m_costf;
971         vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
972         m_costf = lambda * 5;
973         m_cost = m_costf;
974         vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
975         /* BWD is not used in P-frame */
976         vme_state_message[MODE_INTER_BWD] = 0;
977     }
978 }
979
980 #define         MB_SCOREBOARD_A         (1 << 0)
981 #define         MB_SCOREBOARD_B         (1 << 1)
982 #define         MB_SCOREBOARD_C         (1 << 2)
983 void 
984 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
985 {
986     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
987     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
988     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
989                                                            MB_SCOREBOARD_B |
990                                                            MB_SCOREBOARD_C);
991
992     /* In VME prediction the current mb depends on the neighbour 
993      * A/B/C macroblock. So the left/up/up-right dependency should
994      * be considered.
995      */
996     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
997     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
998     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
999     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
1000     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
1001     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
1002
1003     vme_context->gpe_context.vfe_desc7.dword = 0;
1004     return;
1005 }
1006
1007 /* check whether the mb of (x_index, y_index) is out of bound */
1008 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
1009 {
1010     int mb_index;
1011     if (x_index < 0 || x_index >= mb_width)
1012         return -1;
1013     if (y_index < 0 || y_index >= mb_height)
1014         return -1;
1015
1016     mb_index = y_index * mb_width + x_index;
1017     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
1018         return -1;
1019     return 0;
1020 }
1021
1022 void
1023 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1024                                      struct encode_state *encode_state,
1025                                      int mb_width, int mb_height,
1026                                      int kernel,
1027                                      int transform_8x8_mode_flag,
1028                                      struct intel_encoder_context *encoder_context)
1029 {
1030     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1031     int mb_row;
1032     int s;
1033     unsigned int *command_ptr;
1034     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1035     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1036     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1037     int qp,qp_mb,qp_index;
1038     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1039
1040     if (encoder_context->rate_control_mode == VA_RC_CQP)
1041         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1042     else
1043         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1044
1045 #define         USE_SCOREBOARD          (1 << 21)
1046  
1047     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1048     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1049
1050     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1051         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1052         int first_mb = pSliceParameter->macroblock_address;
1053         int num_mb = pSliceParameter->num_macroblocks;
1054         unsigned int mb_intra_ub, score_dep;
1055         int x_outer, y_outer, x_inner, y_inner;
1056         int xtemp_outer = 0;
1057
1058         x_outer = first_mb % mb_width;
1059         y_outer = first_mb / mb_width;
1060         mb_row = y_outer;
1061
1062         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1063             x_inner = x_outer;
1064             y_inner = y_outer;
1065             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1066                 mb_intra_ub = 0;
1067                 score_dep = 0;
1068                 if (x_inner != 0) {
1069                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1070                     score_dep |= MB_SCOREBOARD_A; 
1071                 }
1072                 if (y_inner != mb_row) {
1073                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1074                     score_dep |= MB_SCOREBOARD_B;
1075                     if (x_inner != 0)
1076                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1077                     if (x_inner != (mb_width -1)) {
1078                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1079                         score_dep |= MB_SCOREBOARD_C;
1080                     }
1081                 }
1082
1083                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1084                 *command_ptr++ = kernel;
1085                 *command_ptr++ = USE_SCOREBOARD;
1086                 /* Indirect data */
1087                 *command_ptr++ = 0;
1088                 /* the (X, Y) term of scoreboard */
1089                 *command_ptr++ = ((y_inner << 16) | x_inner);
1090                 *command_ptr++ = score_dep;
1091                 /*inline data */
1092                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1093                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1094                 /* QP occupies one byte */
1095                 if (vme_context->roi_enabled) {
1096                     qp_index = y_inner * mb_width + x_inner;
1097                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1098                 } else
1099                     qp_mb = qp;
1100                 *command_ptr++ = qp_mb;
1101                 x_inner -= 2;
1102                 y_inner += 1;
1103             }
1104             x_outer += 1;
1105         }
1106
1107         xtemp_outer = mb_width - 2;
1108         if (xtemp_outer < 0)
1109             xtemp_outer = 0;
1110         x_outer = xtemp_outer;
1111         y_outer = first_mb / mb_width;
1112         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1113             y_inner = y_outer;
1114             x_inner = x_outer;
1115             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1116                 mb_intra_ub = 0;
1117                 score_dep = 0;
1118                 if (x_inner != 0) {
1119                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1120                     score_dep |= MB_SCOREBOARD_A; 
1121                 }
1122                 if (y_inner != mb_row) {
1123                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1124                     score_dep |= MB_SCOREBOARD_B;
1125                     if (x_inner != 0)
1126                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1127
1128                     if (x_inner != (mb_width -1)) {
1129                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1130                         score_dep |= MB_SCOREBOARD_C;
1131                     }
1132                 }
1133
1134                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1135                 *command_ptr++ = kernel;
1136                 *command_ptr++ = USE_SCOREBOARD;
1137                 /* Indirect data */
1138                 *command_ptr++ = 0;
1139                 /* the (X, Y) term of scoreboard */
1140                 *command_ptr++ = ((y_inner << 16) | x_inner);
1141                 *command_ptr++ = score_dep;
1142                 /*inline data */
1143                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1144                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1145                 /* qp occupies one byte */
1146                 if (vme_context->roi_enabled) {
1147                     qp_index = y_inner * mb_width + x_inner;
1148                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1149                 } else
1150                     qp_mb = qp;
1151                 *command_ptr++ = qp_mb;
1152
1153                 x_inner -= 2;
1154                 y_inner += 1;
1155             }
1156             x_outer++;
1157             if (x_outer >= mb_width) {
1158                 y_outer += 1;
1159                 x_outer = xtemp_outer;
1160             }           
1161         }
1162     }
1163
1164     *command_ptr++ = 0;
1165     *command_ptr++ = MI_BATCH_BUFFER_END;
1166
1167     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1168 }
1169
1170 static uint8_t
1171 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1172 {
1173     unsigned int is_long_term =
1174         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1175     unsigned int is_top_field =
1176         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1177     unsigned int is_bottom_field =
1178         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1179
1180     return ((is_long_term                         << 6) |
1181             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1182             (frame_store_id                       << 1) |
1183             ((is_top_field ^ 1) & is_bottom_field));
1184 }
1185
1186 void
1187 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1188                             struct encode_state *encode_state,
1189                             struct intel_encoder_context *encoder_context)
1190 {
1191     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1192     struct intel_batchbuffer *batch = encoder_context->base.batch;
1193     int slice_type;
1194     struct object_surface *obj_surface;
1195     unsigned int fref_entry, bref_entry;
1196     int frame_index, i;
1197     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1198
1199     fref_entry = 0x80808080;
1200     bref_entry = 0x80808080;
1201     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1202
1203     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1204         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1205
1206         if (ref_idx_l0 > 3) {
1207             WARN_ONCE("ref_idx_l0 is out of range\n");
1208             ref_idx_l0 = 0;
1209         }
1210
1211         obj_surface = vme_context->used_reference_objects[0];
1212         frame_index = -1;
1213         for (i = 0; i < 16; i++) {
1214             if (obj_surface &&
1215                 obj_surface == encode_state->reference_objects[i]) {
1216                 frame_index = i;
1217                 break;
1218             }
1219         }
1220         if (frame_index == -1) {
1221             WARN_ONCE("RefPicList0 is not found in DPB!\n");
1222         } else {
1223             int ref_idx_l0_shift = ref_idx_l0 * 8;
1224             fref_entry &= ~(0xFF << ref_idx_l0_shift);
1225             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1226         }
1227     }
1228
1229     if (slice_type == SLICE_TYPE_B) {
1230         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1231
1232         if (ref_idx_l1 > 3) {
1233             WARN_ONCE("ref_idx_l1 is out of range\n");
1234             ref_idx_l1 = 0;
1235         }
1236
1237         obj_surface = vme_context->used_reference_objects[1];
1238         frame_index = -1;
1239         for (i = 0; i < 16; i++) {
1240             if (obj_surface &&
1241                 obj_surface == encode_state->reference_objects[i]) {
1242                 frame_index = i;
1243                 break;
1244             }
1245         }
1246         if (frame_index == -1) {
1247             WARN_ONCE("RefPicList1 is not found in DPB!\n");
1248         } else {
1249             int ref_idx_l1_shift = ref_idx_l1 * 8;
1250             bref_entry &= ~(0xFF << ref_idx_l1_shift);
1251             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1252         }
1253     }
1254
1255     BEGIN_BCS_BATCH(batch, 10);
1256     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1257     OUT_BCS_BATCH(batch, 0);                  //Select L0
1258     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1259     for(i = 0; i < 7; i++) {
1260         OUT_BCS_BATCH(batch, 0x80808080);
1261     }
1262     ADVANCE_BCS_BATCH(batch);
1263
1264     BEGIN_BCS_BATCH(batch, 10);
1265     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1266     OUT_BCS_BATCH(batch, 1);                  //Select L1
1267     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1268     for(i = 0; i < 7; i++) {
1269         OUT_BCS_BATCH(batch, 0x80808080);
1270     }
1271     ADVANCE_BCS_BATCH(batch);
1272 }
1273
1274
1275 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1276                                  struct encode_state *encode_state,
1277                                  struct intel_encoder_context *encoder_context)
1278 {
1279     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1280     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1281     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1282     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1283     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1284     uint32_t mv_x, mv_y;
1285     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1286     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1287     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1288
1289     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1290         mv_x = 512;
1291         mv_y = 64;
1292     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1293         mv_x = 1024;
1294         mv_y = 128;
1295     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1296         mv_x = 2048;
1297         mv_y = 128;
1298     } else {
1299         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1300         mv_x = 512;
1301         mv_y = 64;
1302     }
1303
1304     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1305     if (pic_param->picture_type != VAEncPictureTypeIntra) {
1306         int qp, m_cost, j, mv_count;
1307         float   lambda, m_costf;
1308         slice_param = (VAEncSliceParameterBufferMPEG2 *)
1309             encode_state->slice_params_ext[0]->buffer;
1310         qp = slice_param->quantiser_scale_code;
1311         lambda = intel_lambda_qp(qp);
1312         /* No Intra prediction. So it is zero */
1313         vme_state_message[MODE_INTRA_8X8] = 0;
1314         vme_state_message[MODE_INTRA_4X4] = 0;
1315         vme_state_message[MODE_INTER_MV0] = 0;
1316         for (j = 1; j < 3; j++) {
1317             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1318             m_cost = (int)m_costf;
1319             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1320         }
1321         mv_count = 3;
1322         for (j = 4; j <= 64; j *= 2) {
1323             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1324             m_cost = (int)m_costf;
1325             vme_state_message[MODE_INTER_MV0 + mv_count] =
1326                 intel_format_lutvalue(m_cost, 0x6f);
1327             mv_count++;
1328         }
1329         m_cost = lambda;
1330         /* It can only perform the 16x16 search. So mode cost can be ignored for
1331          * the other mode. for example: 16x8/8x8
1332          */
1333         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1334         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1335
1336         vme_state_message[MODE_INTER_16X8] = 0;
1337         vme_state_message[MODE_INTER_8X8] = 0;
1338         vme_state_message[MODE_INTER_8X4] = 0;
1339         vme_state_message[MODE_INTER_4X4] = 0;
1340         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1341
1342     }
1343     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1344
1345     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1346         width_in_mbs;
1347 }
1348
1349 void
1350 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1351                                            struct encode_state *encode_state,
1352                                            int mb_width, int mb_height,
1353                                            int kernel,
1354                                            struct intel_encoder_context *encoder_context)
1355 {
1356     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1357     unsigned int *command_ptr;
1358
1359 #define         MPEG2_SCOREBOARD                (1 << 21)
1360
1361     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1362     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1363
1364     {
1365         unsigned int mb_intra_ub, score_dep;
1366         int x_outer, y_outer, x_inner, y_inner;
1367         int xtemp_outer = 0;
1368         int first_mb = 0;
1369         int num_mb = mb_width * mb_height;
1370
1371         x_outer = 0;
1372         y_outer = 0;
1373
1374
1375         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1376             x_inner = x_outer;
1377             y_inner = y_outer;
1378             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1379                 mb_intra_ub = 0;
1380                 score_dep = 0;
1381                 if (x_inner != 0) {
1382                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1383                     score_dep |= MB_SCOREBOARD_A; 
1384                 }
1385                 if (y_inner != 0) {
1386                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1387                     score_dep |= MB_SCOREBOARD_B;
1388
1389                     if (x_inner != 0)
1390                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1391
1392                     if (x_inner != (mb_width -1)) {
1393                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1394                         score_dep |= MB_SCOREBOARD_C;
1395                     }
1396                 }
1397
1398                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1399                 *command_ptr++ = kernel;
1400                 *command_ptr++ = MPEG2_SCOREBOARD;
1401                 /* Indirect data */
1402                 *command_ptr++ = 0;
1403                 /* the (X, Y) term of scoreboard */
1404                 *command_ptr++ = ((y_inner << 16) | x_inner);
1405                 *command_ptr++ = score_dep;
1406                 /*inline data */
1407                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1408                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1409                 x_inner -= 2;
1410                 y_inner += 1;
1411             }
1412             x_outer += 1;
1413         }
1414
1415         xtemp_outer = mb_width - 2;
1416         if (xtemp_outer < 0)
1417             xtemp_outer = 0;
1418         x_outer = xtemp_outer;
1419         y_outer = 0;
1420         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1421             y_inner = y_outer;
1422             x_inner = x_outer;
1423             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1424                 mb_intra_ub = 0;
1425                 score_dep = 0;
1426                 if (x_inner != 0) {
1427                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1428                     score_dep |= MB_SCOREBOARD_A; 
1429                 }
1430                 if (y_inner != 0) {
1431                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1432                     score_dep |= MB_SCOREBOARD_B;
1433
1434                     if (x_inner != 0)
1435                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1436
1437                     if (x_inner != (mb_width -1)) {
1438                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1439                         score_dep |= MB_SCOREBOARD_C;
1440                     }
1441                 }
1442
1443                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1444                 *command_ptr++ = kernel;
1445                 *command_ptr++ = MPEG2_SCOREBOARD;
1446                 /* Indirect data */
1447                 *command_ptr++ = 0;
1448                 /* the (X, Y) term of scoreboard */
1449                 *command_ptr++ = ((y_inner << 16) | x_inner);
1450                 *command_ptr++ = score_dep;
1451                 /*inline data */
1452                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1453                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1454
1455                 x_inner -= 2;
1456                 y_inner += 1;
1457             }
1458             x_outer++;
1459             if (x_outer >= mb_width) {
1460                 y_outer += 1;
1461                 x_outer = xtemp_outer;
1462             }           
1463         }
1464     }
1465
1466     *command_ptr++ = 0;
1467     *command_ptr++ = MI_BATCH_BUFFER_END;
1468
1469     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1470     return;
1471 }
1472
1473 static int
1474 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1475                           VAPictureH264 *ref_list,
1476                           int num_pictures,
1477                           int dir)
1478 {
1479     int i, found = -1, min = 0x7FFFFFFF;
1480
1481     for (i = 0; i < num_pictures; i++) {
1482         int tmp;
1483
1484         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1485             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1486             break;
1487
1488         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1489
1490         if (dir)
1491             tmp = -tmp;
1492
1493         if (tmp > 0 && tmp < min) {
1494             min = tmp;
1495             found = i;
1496         }
1497     }
1498
1499     return found;
1500 }
1501
1502 void
1503 intel_avc_vme_reference_state(VADriverContextP ctx,
1504                               struct encode_state *encode_state,
1505                               struct intel_encoder_context *encoder_context,
1506                               int list_index,
1507                               int surface_index,
1508                               void (* vme_source_surface_state)(
1509                                   VADriverContextP ctx,
1510                                   int index,
1511                                   struct object_surface *obj_surface,
1512                                   struct intel_encoder_context *encoder_context))
1513 {
1514     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1515     struct object_surface *obj_surface = NULL;
1516     struct i965_driver_data *i965 = i965_driver_data(ctx);
1517     VASurfaceID ref_surface_id;
1518     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1519     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1520     int max_num_references;
1521     VAPictureH264 *curr_pic;
1522     VAPictureH264 *ref_list;
1523     int ref_idx;
1524
1525     if (list_index == 0) {
1526         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1527         ref_list = slice_param->RefPicList0;
1528     } else {
1529         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1530         ref_list = slice_param->RefPicList1;
1531     }
1532
1533     if (max_num_references == 1) {
1534         if (list_index == 0) {
1535             ref_surface_id = slice_param->RefPicList0[0].picture_id;
1536             vme_context->used_references[0] = &slice_param->RefPicList0[0];
1537         } else {
1538             ref_surface_id = slice_param->RefPicList1[0].picture_id;
1539             vme_context->used_references[1] = &slice_param->RefPicList1[0];
1540         }
1541
1542         if (ref_surface_id != VA_INVALID_SURFACE)
1543             obj_surface = SURFACE(ref_surface_id);
1544
1545         if (!obj_surface ||
1546             !obj_surface->bo) {
1547             obj_surface = encode_state->reference_objects[list_index];
1548             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1549         }
1550
1551         ref_idx = 0;
1552     } else {
1553         curr_pic = &pic_param->CurrPic;
1554
1555         /* select the reference frame in temporal space */
1556         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1557         ref_surface_id = ref_list[ref_idx].picture_id;
1558
1559         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1560             obj_surface = SURFACE(ref_surface_id);
1561
1562         vme_context->used_reference_objects[list_index] = obj_surface;
1563         vme_context->used_references[list_index] = &ref_list[ref_idx];
1564     }
1565
1566     if (obj_surface &&
1567         obj_surface->bo) {
1568         assert(ref_idx >= 0);
1569         vme_context->used_reference_objects[list_index] = obj_surface;
1570         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1571         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1572                                                     ref_idx << 16 |
1573                                                     ref_idx <<  8 |
1574                                                     ref_idx);
1575     } else {
1576         vme_context->used_reference_objects[list_index] = NULL;
1577         vme_context->used_references[list_index] = NULL;
1578         vme_context->ref_index_in_mb[list_index] = 0;
1579     }
1580 }
1581
1582 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1583                                         struct encode_state *encode_state,
1584                                         struct intel_encoder_context *encoder_context,
1585                                         int slice_index,
1586                                         struct intel_batchbuffer *slice_batch)
1587 {
1588     int count, i, start_index;
1589     unsigned int length_in_bits;
1590     VAEncPackedHeaderParameterBuffer *param = NULL;
1591     unsigned int *header_data = NULL;
1592     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1593     int slice_header_index;
1594
1595     if (encode_state->slice_header_index[slice_index] == 0)
1596         slice_header_index = -1;
1597     else
1598         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1599
1600     count = encode_state->slice_rawdata_count[slice_index];
1601     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1602
1603     for (i = 0; i < count; i++) {
1604         unsigned int skip_emul_byte_cnt;
1605
1606         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1607
1608         param = (VAEncPackedHeaderParameterBuffer *)
1609                     (encode_state->packed_header_params_ext[start_index + i]->buffer);
1610
1611         /* skip the slice header packed data type as it is lastly inserted */
1612         if (param->type == VAEncPackedHeaderSlice)
1613             continue;
1614
1615         length_in_bits = param->bit_length;
1616
1617         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1618
1619         /* as the slice header is still required, the last header flag is set to
1620          * zero.
1621          */
1622         mfc_context->insert_object(ctx,
1623                                    encoder_context,
1624                                    header_data,
1625                                    ALIGN(length_in_bits, 32) >> 5,
1626                                    length_in_bits & 0x1f,
1627                                    skip_emul_byte_cnt,
1628                                    0,
1629                                    0,
1630                                    !param->has_emulation_bytes,
1631                                    slice_batch);
1632     }
1633
1634     if (slice_header_index == -1) {
1635         unsigned char *slice_header = NULL;
1636         int slice_header_length_in_bits = 0;
1637         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1638         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1639         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1640
1641         /* No slice header data is passed. And the driver needs to generate it */
1642         /* For the Normal H264 */
1643         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1644                                                              pPicParameter,
1645                                                              pSliceParameter,
1646                                                              &slice_header);
1647         mfc_context->insert_object(ctx, encoder_context,
1648                                    (unsigned int *)slice_header,
1649                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1650                                    slice_header_length_in_bits & 0x1f,
1651                                    5,  /* first 5 bytes are start code + nal unit type */
1652                                    1, 0, 1, slice_batch);
1653
1654         free(slice_header);
1655     } else {
1656         unsigned int skip_emul_byte_cnt;
1657
1658         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1659
1660         param = (VAEncPackedHeaderParameterBuffer *)
1661                     (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1662         length_in_bits = param->bit_length;
1663
1664         /* as the slice header is the last header data for one slice,
1665          * the last header flag is set to one.
1666          */
1667         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1668
1669         mfc_context->insert_object(ctx,
1670                                    encoder_context,
1671                                    header_data,
1672                                    ALIGN(length_in_bits, 32) >> 5,
1673                                    length_in_bits & 0x1f,
1674                                    skip_emul_byte_cnt,
1675                                    1,
1676                                    0,
1677                                    !param->has_emulation_bytes,
1678                                    slice_batch);
1679     }
1680
1681     return;
1682 }
1683
1684 void
1685 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1686                                 struct encode_state *encode_state,
1687                                 struct intel_encoder_context *encoder_context)
1688 {
1689     struct i965_driver_data *i965 = i965_driver_data(ctx);
1690     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1691     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1692     int qp;
1693     dri_bo *bo;
1694     uint8_t *cost_table;
1695
1696     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1697
1698
1699     if (slice_type == SLICE_TYPE_I) {
1700         if (vme_context->i_qp_cost_table)
1701             return;
1702     } else if (slice_type == SLICE_TYPE_P) {
1703         if (vme_context->p_qp_cost_table)
1704             return;
1705     } else {
1706         if (vme_context->b_qp_cost_table)
1707             return;
1708     }
1709
1710     /* It is enough to allocate 32 bytes for each qp. */
1711     bo = dri_bo_alloc(i965->intel.bufmgr,
1712                       "cost_table ",
1713                       QP_MAX * 32,
1714                       64);
1715
1716     dri_bo_map(bo, 1);
1717     assert(bo->virtual);
1718     cost_table = (uint8_t *)(bo->virtual);
1719     for (qp = 0; qp < QP_MAX; qp++) {
1720         intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1721         cost_table += 32;
1722     }
1723
1724     dri_bo_unmap(bo);
1725
1726     if (slice_type == SLICE_TYPE_I) {
1727         vme_context->i_qp_cost_table = bo;
1728     } else if (slice_type == SLICE_TYPE_P) {
1729         vme_context->p_qp_cost_table = bo;
1730     } else {
1731         vme_context->b_qp_cost_table = bo;
1732     }
1733
1734     vme_context->cost_table_size = QP_MAX * 32;
1735     return;
1736 }
1737
1738 extern void
1739 intel_h264_setup_cost_surface(VADriverContextP ctx,
1740                               struct encode_state *encode_state,
1741                               struct intel_encoder_context *encoder_context,
1742                               unsigned long binding_table_offset,
1743                               unsigned long surface_state_offset)
1744 {
1745     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1746     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1747     dri_bo *bo;
1748
1749
1750     struct i965_buffer_surface cost_table;
1751
1752     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1753
1754
1755     if (slice_type == SLICE_TYPE_I) {
1756         bo = vme_context->i_qp_cost_table;
1757     } else if (slice_type == SLICE_TYPE_P) {
1758         bo = vme_context->p_qp_cost_table;
1759     } else {
1760         bo = vme_context->b_qp_cost_table;
1761     }
1762
1763     cost_table.bo = bo;
1764     cost_table.num_blocks = QP_MAX;
1765     cost_table.pitch = 16;
1766     cost_table.size_block = 32;
1767
1768     vme_context->vme_buffer_suface_setup(ctx,
1769                                          &vme_context->gpe_context,
1770                                          &cost_table,
1771                                          binding_table_offset,
1772                                          surface_state_offset);
1773 }
1774
1775 /*
1776  * the idea of conversion between qp and qstep comes from scaling process
1777  * of transform coeff for Luma component in H264 spec.
1778  *   2^(Qpy / 6 - 6)
1779  * In order to avoid too small qstep, it is multiplied by 16.
1780  */
1781 static float intel_h264_qp_qstep(int qp)
1782 {
1783     float value, qstep;
1784     value = qp;
1785     value = value / 6 - 2;
1786     qstep = powf(2, value);
1787     return qstep;
1788 }
1789
1790 static int intel_h264_qstep_qp(float qstep)
1791 {
1792     float qp;
1793
1794     qp = 12.0f + 6.0f * log2f(qstep);
1795
1796     return floorf(qp);
1797 }
1798
1799 /*
1800  * Currently it is based on the following assumption:
1801  * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1802  *                                 total_aread * 1 / baseqp_qstep
1803  *
1804  * qstep is the linearized quantizer of H264 quantizer
1805  */
1806 typedef struct {
1807     int row_start_in_mb;
1808     int row_end_in_mb;
1809     int col_start_in_mb;
1810     int col_end_in_mb;
1811
1812     int width_mbs;
1813     int height_mbs;
1814
1815     int roi_qp;
1816 } ROIRegionParam;
1817
1818 static VAStatus
1819 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1820                        int base_qp,
1821                        struct encode_state *encode_state,
1822                        struct intel_encoder_context *encoder_context)
1823 {
1824     int nonroi_qp;
1825     int min_qp = MAX(1, encoder_context->brc.min_qp);
1826     bool quickfill = 0;
1827
1828     ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1829     int num_roi = 0;
1830     int i,j;
1831
1832     float temp;
1833     float qstep_nonroi, qstep_base;
1834     float roi_area, total_area, nonroi_area;
1835     float sum_roi;
1836
1837     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1838     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1839     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1840     int mbs_in_picture = width_in_mbs * height_in_mbs;
1841
1842     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1843     VAStatus vaStatus = VA_STATUS_SUCCESS;
1844
1845     /* currently roi_value_is_qp_delta is the only supported mode of priority.
1846      *
1847      * qp_delta set by user is added to base_qp, which is then clapped by
1848      * [base_qp-min_delta, base_qp+max_delta].
1849      */
1850     ASSERT_RET(encoder_context->brc.roi_value_is_qp_delta, VA_STATUS_ERROR_INVALID_PARAMETER);
1851
1852     num_roi = encoder_context->brc.num_roi;
1853
1854     /* when the base_qp is lower than 12, the quality is quite good based
1855      * on the H264 test experience.
1856      * In such case it is unnecessary to adjust the quality for ROI region.
1857      */
1858     if (base_qp <= 12) {
1859         nonroi_qp = base_qp;
1860         quickfill = 1;
1861         goto qp_fill;
1862     }
1863
1864     sum_roi = 0.0f;
1865     roi_area = 0;
1866     for (i = 0; i < num_roi; i++) {
1867         int row_start, row_end, col_start, col_end;
1868         int roi_width_mbs, roi_height_mbs;
1869         int mbs_in_roi;
1870         int roi_qp;
1871         float qstep_roi;
1872
1873         col_start = encoder_context->brc.roi[i].left;
1874         col_end = encoder_context->brc.roi[i].right;
1875         row_start = encoder_context->brc.roi[i].top;
1876         row_end = encoder_context->brc.roi[i].bottom;
1877
1878         col_start = col_start / 16;
1879         col_end = (col_end + 15) / 16;
1880         row_start = row_start / 16;
1881         row_end = (row_end + 15) / 16;
1882
1883         roi_width_mbs = col_end - col_start;
1884         roi_height_mbs = row_end - row_start;
1885         mbs_in_roi = roi_width_mbs * roi_height_mbs;
1886
1887         param_regions[i].row_start_in_mb = row_start;
1888         param_regions[i].row_end_in_mb = row_end;
1889         param_regions[i].col_start_in_mb = col_start;
1890         param_regions[i].col_end_in_mb = col_end;
1891         param_regions[i].width_mbs = roi_width_mbs;
1892         param_regions[i].height_mbs = roi_height_mbs;
1893
1894         roi_qp = base_qp + encoder_context->brc.roi[i].value;
1895         BRC_CLIP(roi_qp, min_qp, 51);
1896
1897         param_regions[i].roi_qp = roi_qp;
1898         qstep_roi = intel_h264_qp_qstep(roi_qp);
1899
1900         roi_area += mbs_in_roi;
1901         sum_roi += mbs_in_roi / qstep_roi;
1902     }
1903
1904     total_area = mbs_in_picture;
1905     nonroi_area = total_area - roi_area;
1906
1907     qstep_base = intel_h264_qp_qstep(base_qp);
1908     temp = (total_area / qstep_base - sum_roi);
1909
1910     if (temp < 0) {
1911         nonroi_qp = 51;
1912     } else {
1913         qstep_nonroi = nonroi_area / temp;
1914         nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
1915     }
1916
1917     BRC_CLIP(nonroi_qp, min_qp, 51);
1918
1919 qp_fill:
1920     memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
1921     if (!quickfill) {
1922         char *qp_ptr;
1923
1924         for (i = 0; i < num_roi; i++) {
1925             for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
1926                 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
1927                 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
1928             }
1929         }
1930     }
1931     return vaStatus;
1932 }
1933
1934 extern void
1935 intel_h264_enc_roi_config(VADriverContextP ctx,
1936                           struct encode_state *encode_state,
1937                           struct intel_encoder_context *encoder_context)
1938 {
1939     char *qp_ptr;
1940     int i, j;
1941     struct i965_driver_data *i965 = i965_driver_data(ctx);
1942     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1943     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1944     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1945     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1946     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1947
1948     int row_start, row_end, col_start, col_end;
1949     int num_roi = 0;
1950
1951     vme_context->roi_enabled = 0;
1952     /* Restriction: Disable ROI when multi-slice is enabled */
1953     if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1954         return;
1955
1956     vme_context->roi_enabled = !!encoder_context->brc.num_roi;
1957
1958     if (!vme_context->roi_enabled)
1959         return;
1960
1961     if ((vme_context->saved_width_mbs !=  width_in_mbs) ||
1962         (vme_context->saved_height_mbs != height_in_mbs)) {
1963         free(vme_context->qp_per_mb);
1964         vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1965
1966         vme_context->saved_width_mbs = width_in_mbs;
1967         vme_context->saved_height_mbs = height_in_mbs;
1968         assert(vme_context->qp_per_mb);
1969     }
1970     if (encoder_context->rate_control_mode == VA_RC_CBR) {
1971         /*
1972          * TODO: More complex Qp adjust needs to be added.
1973          * Currently it is initialized to slice_qp.
1974          */
1975         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1976         int qp;
1977         int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1978
1979         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1980         intel_h264_enc_roi_cbr(ctx, qp, encode_state, encoder_context);
1981
1982     } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1983         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1984         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1985         int qp;
1986         int min_qp = MAX(1, encoder_context->brc.min_qp);
1987
1988         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1989         memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1990
1991
1992         for (j = num_roi; j ; j--) {
1993             int qp_delta, qp_clip;
1994
1995             col_start = encoder_context->brc.roi[i].left;
1996             col_end = encoder_context->brc.roi[i].right;
1997             row_start = encoder_context->brc.roi[i].top;
1998             row_end = encoder_context->brc.roi[i].bottom;
1999
2000             col_start = col_start / 16;
2001             col_end = (col_end + 15) / 16;
2002             row_start = row_start / 16;
2003             row_end = (row_end + 15) / 16;
2004
2005             qp_delta = encoder_context->brc.roi[i].value;
2006             qp_clip = qp + qp_delta;
2007
2008             BRC_CLIP(qp_clip, min_qp, 51);
2009
2010             for (i = row_start; i < row_end; i++) {
2011                 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
2012                 memset(qp_ptr, qp_clip, (col_end - col_start));
2013             }
2014         }
2015     } else {
2016         /*
2017          * TODO: Disable it for non CBR-CQP.
2018          */
2019         vme_context->roi_enabled = 0;
2020     }
2021
2022     if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2023         encoder_context->soft_batch_force = 1;
2024
2025     return;
2026 }
2027
2028 /* HEVC */
2029 static int
2030 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2031                            VAPictureHEVC *ref_list,
2032                            int num_pictures,
2033                            int dir)
2034 {
2035     int i, found = -1, min = 0x7FFFFFFF;
2036
2037     for (i = 0; i < num_pictures; i++) {
2038         int tmp;
2039
2040         if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2041             (ref_list[i].picture_id == VA_INVALID_SURFACE))
2042             break;
2043
2044         tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2045
2046         if (dir)
2047             tmp = -tmp;
2048
2049         if (tmp > 0 && tmp < min) {
2050             min = tmp;
2051             found = i;
2052         }
2053     }
2054
2055     return found;
2056 }
2057 void
2058 intel_hevc_vme_reference_state(VADriverContextP ctx,
2059                                struct encode_state *encode_state,
2060                                struct intel_encoder_context *encoder_context,
2061                                int list_index,
2062                                int surface_index,
2063                                void (* vme_source_surface_state)(
2064                                    VADriverContextP ctx,
2065                                    int index,
2066                                    struct object_surface *obj_surface,
2067                                    struct intel_encoder_context *encoder_context))
2068 {
2069     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2070     struct object_surface *obj_surface = NULL;
2071     struct i965_driver_data *i965 = i965_driver_data(ctx);
2072     VASurfaceID ref_surface_id;
2073     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2074     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2075     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2076     int max_num_references;
2077     VAPictureHEVC *curr_pic;
2078     VAPictureHEVC *ref_list;
2079     int ref_idx;
2080     unsigned int is_hevc10 = 0;
2081     GenHevcSurface *hevc_encoder_surface = NULL;
2082
2083     if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2084         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2085         is_hevc10 = 1;
2086
2087     if (list_index == 0) {
2088         max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2089         ref_list = slice_param->ref_pic_list0;
2090     } else {
2091         max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2092         ref_list = slice_param->ref_pic_list1;
2093     }
2094
2095     if (max_num_references == 1) {
2096         if (list_index == 0) {
2097             ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2098             vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2099         } else {
2100             ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2101             vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2102         }
2103
2104         if (ref_surface_id != VA_INVALID_SURFACE)
2105             obj_surface = SURFACE(ref_surface_id);
2106
2107         if (!obj_surface ||
2108             !obj_surface->bo) {
2109             obj_surface = encode_state->reference_objects[list_index];
2110             vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2111         }
2112
2113         ref_idx = 0;
2114     } else {
2115         curr_pic = &pic_param->decoded_curr_pic;
2116
2117         /* select the reference frame in temporal space */
2118         ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2119         ref_surface_id = ref_list[ref_idx].picture_id;
2120
2121         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2122             obj_surface = SURFACE(ref_surface_id);
2123
2124         vme_context->used_reference_objects[list_index] = obj_surface;
2125         vme_context->used_references[list_index] = &ref_list[ref_idx];
2126     }
2127
2128     if (obj_surface &&
2129         obj_surface->bo) {
2130         assert(ref_idx >= 0);
2131         vme_context->used_reference_objects[list_index] = obj_surface;
2132
2133         if(is_hevc10){
2134             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2135             assert(hevc_encoder_surface);
2136             obj_surface = hevc_encoder_surface->nv12_surface_obj;
2137         }
2138         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2139         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2140                 ref_idx << 16 |
2141                 ref_idx <<  8 |
2142                 ref_idx);
2143     } else {
2144         vme_context->used_reference_objects[list_index] = NULL;
2145         vme_context->used_references[list_index] = NULL;
2146         vme_context->ref_index_in_mb[list_index] = 0;
2147     }
2148 }
2149
2150 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2151                                      struct encode_state *encode_state,
2152                                      struct intel_encoder_context *encoder_context)
2153 {
2154     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2155     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2156     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2157     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2158     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2159     int qp, m_cost, j, mv_count;
2160     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2161     float   lambda, m_costf;
2162
2163     /* here no SI SP slice for HEVC, do not need slice fixup */
2164     int slice_type = slice_param->slice_type;
2165
2166
2167     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2168
2169     if(encoder_context->rate_control_mode == VA_RC_CBR)
2170     {
2171         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2172         if(slice_type == HEVC_SLICE_B) {
2173             if(pSequenceParameter->ip_period == 1)
2174             {
2175                 slice_type = HEVC_SLICE_P;
2176                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2177
2178             }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2179                 slice_type = HEVC_SLICE_P;
2180                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2181             }
2182         }
2183
2184     }
2185
2186     if (vme_state_message == NULL)
2187         return;
2188
2189     assert(qp <= QP_MAX);
2190     lambda = intel_lambda_qp(qp);
2191     if (slice_type == HEVC_SLICE_I) {
2192         vme_state_message[MODE_INTRA_16X16] = 0;
2193         m_cost = lambda * 4;
2194         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2195         m_cost = lambda * 16;
2196         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2197         m_cost = lambda * 3;
2198         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2199     } else {
2200         m_cost = 0;
2201         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2202         for (j = 1; j < 3; j++) {
2203             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2204             m_cost = (int)m_costf;
2205             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2206         }
2207         mv_count = 3;
2208         for (j = 4; j <= 64; j *= 2) {
2209             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2210             m_cost = (int)m_costf;
2211             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2212             mv_count++;
2213         }
2214
2215         if (qp <= 25) {
2216             vme_state_message[MODE_INTRA_16X16] = 0x4a;
2217             vme_state_message[MODE_INTRA_8X8] = 0x4a;
2218             vme_state_message[MODE_INTRA_4X4] = 0x4a;
2219             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2220             vme_state_message[MODE_INTER_16X16] = 0x4a;
2221             vme_state_message[MODE_INTER_16X8] = 0x4a;
2222             vme_state_message[MODE_INTER_8X8] = 0x4a;
2223             vme_state_message[MODE_INTER_8X4] = 0x4a;
2224             vme_state_message[MODE_INTER_4X4] = 0x4a;
2225             vme_state_message[MODE_INTER_BWD] = 0x2a;
2226             return;
2227         }
2228         m_costf = lambda * 10;
2229         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2230         m_cost = lambda * 14;
2231         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2232         m_cost = lambda * 24;
2233         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2234         m_costf = lambda * 3.5;
2235         m_cost = m_costf;
2236         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2237         if (slice_type == HEVC_SLICE_P) {
2238             m_costf = lambda * 2.5;
2239             m_cost = m_costf;
2240             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2241             m_costf = lambda * 4;
2242             m_cost = m_costf;
2243             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2244             m_costf = lambda * 1.5;
2245             m_cost = m_costf;
2246             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2247             m_costf = lambda * 3;
2248             m_cost = m_costf;
2249             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2250             m_costf = lambda * 5;
2251             m_cost = m_costf;
2252             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2253             /* BWD is not used in P-frame */
2254             vme_state_message[MODE_INTER_BWD] = 0;
2255         } else {
2256             m_costf = lambda * 2.5;
2257             m_cost = m_costf;
2258             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2259             m_costf = lambda * 5.5;
2260             m_cost = m_costf;
2261             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2262             m_costf = lambda * 3.5;
2263             m_cost = m_costf;
2264             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2265             m_costf = lambda * 5.0;
2266             m_cost = m_costf;
2267             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2268             m_costf = lambda * 6.5;
2269             m_cost = m_costf;
2270             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2271             m_costf = lambda * 1.5;
2272             m_cost = m_costf;
2273             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
2274         }
2275     }
2276 }