OSDN Git Service

svct: Save the current slice type per layer
[android-x86/hardware-intel-common-vaapi.git] / src / gen6_mfc_common.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "gen9_mfc.h"
45 #include "intel_media.h"
46
47 #ifndef HAVE_LOG2F
48 #define log2f(x) (logf(x)/(float)M_LN2)
49 #endif
50
51 int intel_avc_enc_slice_type_fixup(int slice_type)
52 {
53     if (slice_type == SLICE_TYPE_SP ||
54         slice_type == SLICE_TYPE_P)
55         slice_type = SLICE_TYPE_P;
56     else if (slice_type == SLICE_TYPE_SI ||
57              slice_type == SLICE_TYPE_I)
58         slice_type = SLICE_TYPE_I;
59     else {
60         if (slice_type != SLICE_TYPE_B)
61             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
62
63         slice_type = SLICE_TYPE_B;
64     }
65
66     return slice_type;
67 }
68
69 static void
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
71                                         struct intel_encoder_context *encoder_context)
72 {
73     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
74     int i;
75
76     for(i = 0 ; i < 3; i++) {
77         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79         mfc_context->bit_rate_control_context[i].GrowInit = 6;
80         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
83         
84         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
90     }
91 }
92
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94                                struct intel_encoder_context* encoder_context)
95 {
96     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97     double bitrate, framerate;
98     double qp1_size = 0.1 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99     double qp51_size = 0.001 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
100     double bpf, factor;
101     int inum = encoder_context->brc.num_iframes_in_gop,
102         pnum = encoder_context->brc.num_pframes_in_gop,
103         bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
104     int intra_period = encoder_context->brc.gop_size;
105     int i;
106
107     mfc_context->brc.mode = encoder_context->rate_control_mode;
108
109     mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
110     mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
111     mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
112
113     mfc_context->hrd.buffer_size = encoder_context->brc.hrd_buffer_size;
114     mfc_context->hrd.current_buffer_fullness =
115         (double)(encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
116         encoder_context->brc.hrd_initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
117     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
118     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
119     mfc_context->hrd.violation_noted = 0;
120
121     for (i = 0; i < encoder_context->layer.num_layers; i++) {
122         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
123         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
124         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
125
126         if (i == 0) {
127             bitrate = encoder_context->brc.bits_per_second[0];
128             framerate = (double)encoder_context->brc.framerate_per_100s[0] / 100.0;
129         } else {
130             bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
131             framerate = (double)(encoder_context->brc.framerate_per_100s[i] - encoder_context->brc.framerate_per_100s[i - 1]) / 100.0;
132         }
133
134         if (i == encoder_context->layer.num_layers - 1)
135             factor = 1.0;
136         else
137             factor = (double)encoder_context->brc.framerate_per_100s[i] / encoder_context->brc.framerate_per_100s[i + 1];
138
139         mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period * factor)/framerate) /
140                                                                     (double)(inum + BRC_PWEIGHT * pnum * factor + BRC_BWEIGHT * bnum * factor));
141         mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
142         mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
143
144         bpf = mfc_context->brc.bits_per_frame[i] = bitrate/framerate;
145
146         if ((bpf > qp51_size) && (bpf < qp1_size)) {
147             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
148         }
149         else if (bpf >= qp1_size)
150             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
151         else if (bpf <= qp51_size)
152             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
153
154         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
155         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
156
157         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], 1, 51);
158         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], 1, 51);
159         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], 1, 51);
160     }
161 }
162
163 int intel_mfc_update_hrd(struct encode_state *encode_state,
164                          struct intel_encoder_context *encoder_context,
165                          int frame_bits)
166 {
167     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
168     double prev_bf = mfc_context->hrd.current_buffer_fullness;
169
170     mfc_context->hrd.current_buffer_fullness -= frame_bits;
171
172     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
173         mfc_context->hrd.current_buffer_fullness = prev_bf;
174         return BRC_UNDERFLOW;
175     }
176     
177     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame[encoder_context->layer.curr_frame_layer_id];
178     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
179         if (mfc_context->brc.mode == VA_RC_VBR)
180             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
181         else {
182             mfc_context->hrd.current_buffer_fullness = prev_bf;
183             return BRC_OVERFLOW;
184         }
185     }
186     return BRC_NO_HRD_VIOLATION;
187 }
188
189 int intel_mfc_brc_postpack(struct encode_state *encode_state,
190                            struct intel_encoder_context *encoder_context,
191                            int frame_bits)
192 {
193     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
194     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
195     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
196     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
197     int curr_frame_layer_id, next_frame_layer_id;
198     int qpi, qpp, qpb;
199     int qp; // quantizer of previously encoded slice of current type
200     int qpn; // predicted quantizer for next frame of current type in integer format
201     double qpf; // predicted quantizer for next frame of current type in float format
202     double delta_qp; // QP correction
203     int target_frame_size, frame_size_next;
204     /* Notes:
205      *  x - how far we are from HRD buffer borders
206      *  y - how far we are from target HRD buffer fullness
207      */
208     double x, y;
209     double frame_size_alpha, factor;
210
211     if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
212         curr_frame_layer_id = 0;
213         next_frame_layer_id = 0;
214     } else {
215         curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
216         next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
217     }
218
219     /* checking wthether HRD compliance first */
220     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
221
222     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
223         /* nothing */
224     } else {
225         next_frame_layer_id = curr_frame_layer_id;
226     }
227
228     mfc_context->brc.bits_prev_frame[curr_frame_layer_id] = frame_bits;
229     frame_bits = mfc_context->brc.bits_prev_frame[next_frame_layer_id];
230
231     mfc_context->brc.prev_slice_type[curr_frame_layer_id] = slicetype;
232     slicetype = mfc_context->brc.prev_slice_type[next_frame_layer_id];
233
234     if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0)
235         factor = 1.0;
236     else
237         factor = (double)encoder_context->brc.framerate_per_100s[next_frame_layer_id] / encoder_context->brc.framerate_per_100s[encoder_context->layer.num_layers - 1];
238
239     /* 0 means the next frame is the first frame of next layer */
240     if (frame_bits == 0)
241         return sts;
242
243     qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
244     qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
245     qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
246
247     qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
248
249     target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
250     if (mfc_context->hrd.buffer_capacity < 5)
251         frame_size_alpha = 0;
252     else
253         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype] * factor;
254     if (frame_size_alpha > 30) frame_size_alpha = 30;
255     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
256         (double)(frame_size_alpha + 1.);
257
258     /* frame_size_next: avoiding negative number and too small value */
259     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
260         frame_size_next = (int)((double)target_frame_size * 0.25);
261
262     qpf = (double)qp * target_frame_size / frame_size_next;
263     qpn = (int)(qpf + 0.5);
264
265     if (qpn == qp) {
266         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
267         mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] += qpf - qpn;
268         if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] > 1.0) {
269             qpn++;
270             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
271         } else if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] < -1.0) {
272             qpn--;
273             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
274         }
275     }
276     /* making sure that QP is not changing too fast */
277     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
278     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
279     /* making sure that with QP predictions we did do not leave QPs range */
280     BRC_CLIP(qpn, 1, 51);
281
282     /* calculating QP delta as some function*/
283     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
284     if (x > 0) {
285         x /= mfc_context->hrd.target_buffer_fullness;
286         y = mfc_context->hrd.current_buffer_fullness;
287     }
288     else {
289         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
290         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
291     }
292     if (y < 0.01) y = 0.01;
293     if (x > 1) x = 1;
294     else if (x < -1) x = -1;
295
296     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
297     qpn = (int)(qpn + delta_qp + 0.5);
298
299     /* making sure that with QP predictions we did do not leave QPs range */
300     BRC_CLIP(qpn, 1, 51);
301
302     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
303         /* correcting QPs of slices of other types */
304         if (slicetype == SLICE_TYPE_P) {
305             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
306                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
307             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
308                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
309         } else if (slicetype == SLICE_TYPE_I) {
310             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
311                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
312             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
313                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
314         } else { // SLICE_TYPE_B
315             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
316                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
317             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
318                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
319         }
320         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], 1, 51);
321         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], 1, 51);
322         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], 1, 51);
323     } else if (sts == BRC_UNDERFLOW) { // underflow
324         if (qpn <= qp) qpn = qp + 1;
325         if (qpn > 51) {
326             qpn = 51;
327             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
328         }
329     } else if (sts == BRC_OVERFLOW) {
330         if (qpn >= qp) qpn = qp - 1;
331         if (qpn < 1) { // < 0 (?) overflow with minQP
332             qpn = 1;
333             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
334         }
335     }
336
337     mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
338
339     return sts;
340 }
341
342 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
343                                        struct intel_encoder_context *encoder_context)
344 {
345     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
346     unsigned int rate_control_mode = encoder_context->rate_control_mode;
347     int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
348     
349     // current we only support CBR mode.
350     if (rate_control_mode == VA_RC_CBR) {
351         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
352         mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
353         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
354         mfc_context->vui_hrd.i_frame_number = 0;
355
356         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; 
357         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
358         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
359     }
360
361 }
362
363 void 
364 intel_mfc_hrd_context_update(struct encode_state *encode_state, 
365                              struct gen6_mfc_context *mfc_context)
366 {
367     mfc_context->vui_hrd.i_frame_number++;
368 }
369
370 int intel_mfc_interlace_check(VADriverContextP ctx,
371                               struct encode_state *encode_state,
372                               struct intel_encoder_context *encoder_context)
373 {
374     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
375     VAEncSliceParameterBufferH264 *pSliceParameter;
376     int i;
377     int mbCount = 0;
378     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
379     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
380   
381     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
382         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer; 
383         mbCount += pSliceParameter->num_macroblocks; 
384     }
385     
386     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
387         return 0;
388
389     return 1;
390 }
391
392 void intel_mfc_brc_prepare(struct encode_state *encode_state,
393                            struct intel_encoder_context *encoder_context)
394 {
395     unsigned int rate_control_mode = encoder_context->rate_control_mode;
396
397     if (encoder_context->codec != CODEC_H264 &&
398         encoder_context->codec != CODEC_H264_MVC)
399         return;
400
401     if (rate_control_mode == VA_RC_CBR) {
402         /*Programing bit rate control */
403         if (encoder_context->brc.need_reset) {
404             intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
405             intel_mfc_brc_init(encode_state, encoder_context);
406         }
407
408         /*Programing HRD control */
409         if (encoder_context->brc.need_reset)
410             intel_mfc_hrd_context_init(encode_state, encoder_context);    
411     }
412 }
413
414 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
415                                               struct encode_state *encode_state,
416                                               struct intel_encoder_context *encoder_context,
417                                               struct intel_batchbuffer *slice_batch)
418 {
419     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
420     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
421     unsigned int rate_control_mode = encoder_context->rate_control_mode;
422     unsigned int skip_emul_byte_cnt;
423
424     if (encode_state->packed_header_data[idx]) {
425         VAEncPackedHeaderParameterBuffer *param = NULL;
426         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
427         unsigned int length_in_bits;
428
429         assert(encode_state->packed_header_param[idx]);
430         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
431         length_in_bits = param->bit_length;
432
433         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
434         mfc_context->insert_object(ctx,
435                                    encoder_context,
436                                    header_data,
437                                    ALIGN(length_in_bits, 32) >> 5,
438                                    length_in_bits & 0x1f,
439                                    skip_emul_byte_cnt,
440                                    0,
441                                    0,
442                                    !param->has_emulation_bytes,
443                                    slice_batch);
444     }
445
446     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
447
448     if (encode_state->packed_header_data[idx]) {
449         VAEncPackedHeaderParameterBuffer *param = NULL;
450         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
451         unsigned int length_in_bits;
452
453         assert(encode_state->packed_header_param[idx]);
454         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
455         length_in_bits = param->bit_length;
456
457         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
458
459         mfc_context->insert_object(ctx,
460                                    encoder_context,
461                                    header_data,
462                                    ALIGN(length_in_bits, 32) >> 5,
463                                    length_in_bits & 0x1f,
464                                    skip_emul_byte_cnt,
465                                    0,
466                                    0,
467                                    !param->has_emulation_bytes,
468                                    slice_batch);
469     }
470     
471     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
472
473     if (encode_state->packed_header_data[idx]) {
474         VAEncPackedHeaderParameterBuffer *param = NULL;
475         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
476         unsigned int length_in_bits;
477
478         assert(encode_state->packed_header_param[idx]);
479         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
480         length_in_bits = param->bit_length;
481
482         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
483         mfc_context->insert_object(ctx,
484                                    encoder_context,
485                                    header_data,
486                                    ALIGN(length_in_bits, 32) >> 5,
487                                    length_in_bits & 0x1f,
488                                    skip_emul_byte_cnt,
489                                    0,
490                                    0,
491                                    !param->has_emulation_bytes,
492                                    slice_batch);
493     } else if (rate_control_mode == VA_RC_CBR) {
494         // this is frist AU
495         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
496
497         unsigned char *sei_data = NULL;
498     
499         int length_in_bits = build_avc_sei_buffer_timing(
500             mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
501             mfc_context->vui_hrd.i_initial_cpb_removal_delay,
502             0,
503             mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
504             mfc_context->vui_hrd.i_dpb_output_delay_length,
505             0,
506             &sei_data);
507         mfc_context->insert_object(ctx,
508                                    encoder_context,
509                                    (unsigned int *)sei_data,
510                                    ALIGN(length_in_bits, 32) >> 5,
511                                    length_in_bits & 0x1f,
512                                    5,
513                                    0,   
514                                    0,   
515                                    1,
516                                    slice_batch);  
517         free(sei_data);
518     }
519 }
520
521 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, 
522                                struct encode_state *encode_state,
523                                struct intel_encoder_context *encoder_context)
524 {
525     struct i965_driver_data *i965 = i965_driver_data(ctx);
526     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
527     struct object_surface *obj_surface; 
528     struct object_buffer *obj_buffer;
529     GenAvcSurface *gen6_avc_surface;
530     dri_bo *bo;
531     VAStatus vaStatus = VA_STATUS_SUCCESS;
532     int i, j, enable_avc_ildb = 0;
533     VAEncSliceParameterBufferH264 *slice_param;
534     struct i965_coded_buffer_segment *coded_buffer_segment;
535     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
536     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
537     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
538
539     if (IS_GEN6(i965->intel.device_info)) {
540         /* On the SNB it should be fixed to 128 for the DMV buffer */
541         width_in_mbs = 128;
542     }
543
544     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
545         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
546         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
547
548         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
549             assert((slice_param->slice_type == SLICE_TYPE_I) ||
550                    (slice_param->slice_type == SLICE_TYPE_SI) ||
551                    (slice_param->slice_type == SLICE_TYPE_P) ||
552                    (slice_param->slice_type == SLICE_TYPE_SP) ||
553                    (slice_param->slice_type == SLICE_TYPE_B));
554
555             if (slice_param->disable_deblocking_filter_idc != 1) {
556                 enable_avc_ildb = 1;
557                 break;
558             }
559
560             slice_param++;
561         }
562     }
563
564     /*Setup all the input&output object*/
565
566     /* Setup current frame and current direct mv buffer*/
567     obj_surface = encode_state->reconstructed_object;
568     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
569
570     if ( obj_surface->private_data == NULL) {
571         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
572         assert(gen6_avc_surface);
573         gen6_avc_surface->dmv_top = 
574             dri_bo_alloc(i965->intel.bufmgr,
575                          "Buffer",
576                          68 * width_in_mbs * height_in_mbs, 
577                          64);
578         gen6_avc_surface->dmv_bottom = 
579             dri_bo_alloc(i965->intel.bufmgr,
580                          "Buffer",
581                          68 * width_in_mbs * height_in_mbs, 
582                          64);
583         assert(gen6_avc_surface->dmv_top);
584         assert(gen6_avc_surface->dmv_bottom);
585         obj_surface->private_data = (void *)gen6_avc_surface;
586         obj_surface->free_private_data = (void *)gen_free_avc_surface; 
587     }
588     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
589     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
590     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
591     dri_bo_reference(gen6_avc_surface->dmv_top);
592     dri_bo_reference(gen6_avc_surface->dmv_bottom);
593
594     if (enable_avc_ildb) {
595         mfc_context->post_deblocking_output.bo = obj_surface->bo;
596         dri_bo_reference(mfc_context->post_deblocking_output.bo);
597     } else {
598         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
599         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
600     }
601
602     mfc_context->surface_state.width = obj_surface->orig_width;
603     mfc_context->surface_state.height = obj_surface->orig_height;
604     mfc_context->surface_state.w_pitch = obj_surface->width;
605     mfc_context->surface_state.h_pitch = obj_surface->height;
606     
607     /* Setup reference frames and direct mv buffers*/
608     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
609         obj_surface = encode_state->reference_objects[i];
610         
611         if (obj_surface && obj_surface->bo) {
612             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
613             dri_bo_reference(obj_surface->bo);
614
615             /* Check DMV buffer */
616             if ( obj_surface->private_data == NULL) {
617                 
618                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
619                 assert(gen6_avc_surface);
620                 gen6_avc_surface->dmv_top = 
621                     dri_bo_alloc(i965->intel.bufmgr,
622                                  "Buffer",
623                                  68 * width_in_mbs * height_in_mbs, 
624                                  64);
625                 gen6_avc_surface->dmv_bottom = 
626                     dri_bo_alloc(i965->intel.bufmgr,
627                                  "Buffer",
628                                  68 * width_in_mbs * height_in_mbs, 
629                                  64);
630                 assert(gen6_avc_surface->dmv_top);
631                 assert(gen6_avc_surface->dmv_bottom);
632                 obj_surface->private_data = gen6_avc_surface;
633                 obj_surface->free_private_data = gen_free_avc_surface; 
634             }
635     
636             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
637             /* Setup DMV buffer */
638             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
639             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
640             dri_bo_reference(gen6_avc_surface->dmv_top);
641             dri_bo_reference(gen6_avc_surface->dmv_bottom);
642         } else {
643             break;
644         }
645     }
646
647     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
648     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
649
650     obj_buffer = encode_state->coded_buf_object;
651     bo = obj_buffer->buffer_store->bo;
652     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
653     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
654     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
655     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
656     
657     dri_bo_map(bo, 1);
658     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
659     coded_buffer_segment->mapped = 0;
660     coded_buffer_segment->codec = encoder_context->codec;
661     dri_bo_unmap(bo);
662
663     return vaStatus;
664 }
665 /*
666  * The LUT uses the pair of 4-bit units: (shift, base) structure.
667  * 2^K * X = value . 
668  * So it is necessary to convert one cost into the nearest LUT format.
669  * The derivation is:
670  * 2^K *x = 2^n * (1 + deltaX)
671  *    k + log2(x) = n + log2(1 + deltaX)
672  *    log2(x) = n - k + log2(1 + deltaX)
673  *    As X is in the range of [1, 15]
674  *      4 > n - k + log2(1 + deltaX) >= 0 
675  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
676  *    Then we can derive the corresponding K and get the nearest LUT format.
677  */
678 int intel_format_lutvalue(int value, int max)
679 {
680     int ret;
681     int logvalue, temp1, temp2;
682
683     if (value <= 0)
684         return 0;
685
686     logvalue = (int)(log2f((float)value));
687     if (logvalue < 4) {
688         ret = value;
689     } else {
690         int error, temp_value, base, j, temp_err;
691         error = value;
692         j = logvalue - 4 + 1;
693         ret = -1;
694         for(; j <= logvalue; j++) {
695             if (j == 0) {
696                 base = value >> j;
697             } else {
698                 base = (value + (1 << (j - 1)) - 1) >> j;
699             }
700             if (base >= 16)
701                 continue;
702
703             temp_value = base << j;
704             temp_err = abs(value - temp_value);
705             if (temp_err < error) {
706                 error = temp_err;
707                 ret = (j << 4) | base;
708                 if (temp_err == 0)
709                     break;
710             }
711         }
712     }
713     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
714     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
715     if (temp1 > temp2)
716         ret = max;
717     return ret;
718
719 }
720
721
722 #define         QP_MAX                  52
723 #define         VP8_QP_MAX              128
724
725
726 static float intel_lambda_qp(int qp)
727 {
728     float value, lambdaf;
729     value = qp;
730     value = value / 6 - 2;
731     if (value < 0)
732         value = 0;
733     lambdaf = roundf(powf(2, value));
734     return lambdaf;
735 }
736
737 static
738 void intel_h264_calc_mbmvcost_qp(int qp,
739                                  int slice_type,
740                                  uint8_t *vme_state_message)
741 {
742     int m_cost, j, mv_count;
743     float   lambda, m_costf;
744
745     assert(qp <= QP_MAX); 
746     lambda = intel_lambda_qp(qp);
747
748     m_cost = lambda;
749     vme_state_message[MODE_CHROMA_INTRA] = 0;
750     vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
751
752     if (slice_type == SLICE_TYPE_I) {
753         vme_state_message[MODE_INTRA_16X16] = 0;
754         m_cost = lambda * 4;
755         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
756         m_cost = lambda * 16; 
757         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
758         m_cost = lambda * 3;
759         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
760     } else {
761         m_cost = 0;
762         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
763         for (j = 1; j < 3; j++) {
764             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
765             m_cost = (int)m_costf;
766             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
767         }
768         mv_count = 3;
769         for (j = 4; j <= 64; j *= 2) {
770             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
771             m_cost = (int)m_costf;
772             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
773             mv_count++;
774         }
775
776         if (qp <= 25) {
777             vme_state_message[MODE_INTRA_16X16] = 0x4a;
778             vme_state_message[MODE_INTRA_8X8] = 0x4a;
779             vme_state_message[MODE_INTRA_4X4] = 0x4a;
780             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
781             vme_state_message[MODE_INTER_16X16] = 0x4a;
782             vme_state_message[MODE_INTER_16X8] = 0x4a;
783             vme_state_message[MODE_INTER_8X8] = 0x4a;
784             vme_state_message[MODE_INTER_8X4] = 0x4a;
785             vme_state_message[MODE_INTER_4X4] = 0x4a;
786             vme_state_message[MODE_INTER_BWD] = 0x2a;
787             return;
788         }
789         m_costf = lambda * 10;
790         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
791         m_cost = lambda * 14;
792         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
793         m_cost = lambda * 24; 
794         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
795         m_costf = lambda * 3.5;
796         m_cost = m_costf;
797         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
798         if (slice_type == SLICE_TYPE_P) {
799             m_costf = lambda * 2.5;
800             m_cost = m_costf;
801             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
802             m_costf = lambda * 4;
803             m_cost = m_costf;
804             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
805             m_costf = lambda * 1.5;
806             m_cost = m_costf;
807             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
808             m_costf = lambda * 3;
809             m_cost = m_costf;
810             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
811             m_costf = lambda * 5;
812             m_cost = m_costf;
813             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
814             /* BWD is not used in P-frame */
815             vme_state_message[MODE_INTER_BWD] = 0;
816         } else {
817             m_costf = lambda * 2.5;
818             m_cost = m_costf;
819             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
820             m_costf = lambda * 5.5;
821             m_cost = m_costf;
822             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
823             m_costf = lambda * 3.5;
824             m_cost = m_costf;
825             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
826             m_costf = lambda * 5.0;
827             m_cost = m_costf;
828             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
829             m_costf = lambda * 6.5;
830             m_cost = m_costf;
831             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
832             m_costf = lambda * 1.5;
833             m_cost = m_costf;
834             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
835         }
836     }
837     return;
838 }
839
840 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
841                                 struct encode_state *encode_state,
842                                 struct intel_encoder_context *encoder_context)
843 {
844     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
845     struct gen6_vme_context *vme_context = encoder_context->vme_context;
846     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
847     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
848     int qp;
849     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
850
851     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
852
853     if (encoder_context->rate_control_mode == VA_RC_CQP)
854         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
855     else
856         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
857
858     if (vme_state_message == NULL)
859         return;
860
861     intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
862 }
863
864 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
865                                 struct encode_state *encode_state,
866                                 struct intel_encoder_context *encoder_context)
867 {
868     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
869     struct gen6_vme_context *vme_context = encoder_context->vme_context;
870     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
871     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
872     int qp, m_cost, j, mv_count;
873     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
874     float   lambda, m_costf;
875
876     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
877     int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
878   
879     if (vme_state_message == NULL)
880         return;
881  
882     if (encoder_context->rate_control_mode == VA_RC_CQP)
883         qp = q_matrix->quantization_index[0];
884     else
885         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
886
887     lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
888
889     m_cost = lambda;
890     vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
891
892     if (is_key_frame) {
893         vme_state_message[MODE_INTRA_16X16] = 0;
894         m_cost = lambda * 16; 
895         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
896         m_cost = lambda * 3;
897         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
898     } else {
899         m_cost = 0;
900         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
901         for (j = 1; j < 3; j++) {
902             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
903             m_cost = (int)m_costf;
904             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
905         }
906         mv_count = 3;
907         for (j = 4; j <= 64; j *= 2) {
908             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
909             m_cost = (int)m_costf;
910             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
911             mv_count++;
912         }
913
914         if (qp < 92 ) {
915             vme_state_message[MODE_INTRA_16X16] = 0x4a;
916             vme_state_message[MODE_INTRA_4X4] = 0x4a;
917             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
918             vme_state_message[MODE_INTER_16X16] = 0x4a;
919             vme_state_message[MODE_INTER_16X8] = 0x4a;
920             vme_state_message[MODE_INTER_8X8] = 0x4a;
921             vme_state_message[MODE_INTER_4X4] = 0x4a;
922             vme_state_message[MODE_INTER_BWD] = 0;
923             return;
924         }
925         m_costf = lambda * 10;
926         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
927         m_cost = lambda * 24; 
928         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
929             
930         m_costf = lambda * 3.5;
931         m_cost = m_costf;
932         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
933
934         m_costf = lambda * 2.5;
935         m_cost = m_costf;
936         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
937         m_costf = lambda * 4;
938         m_cost = m_costf;
939         vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
940         m_costf = lambda * 1.5;
941         m_cost = m_costf;
942         vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
943         m_costf = lambda * 5;
944         m_cost = m_costf;
945         vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
946         /* BWD is not used in P-frame */
947         vme_state_message[MODE_INTER_BWD] = 0;
948     }
949 }
950
951 #define         MB_SCOREBOARD_A         (1 << 0)
952 #define         MB_SCOREBOARD_B         (1 << 1)
953 #define         MB_SCOREBOARD_C         (1 << 2)
954 void 
955 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
956 {
957     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
958     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
959     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
960                                                            MB_SCOREBOARD_B |
961                                                            MB_SCOREBOARD_C);
962
963     /* In VME prediction the current mb depends on the neighbour 
964      * A/B/C macroblock. So the left/up/up-right dependency should
965      * be considered.
966      */
967     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
968     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
969     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
970     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
971     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
972     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
973
974     vme_context->gpe_context.vfe_desc7.dword = 0;
975     return;
976 }
977
978 /* check whether the mb of (x_index, y_index) is out of bound */
979 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
980 {
981     int mb_index;
982     if (x_index < 0 || x_index >= mb_width)
983         return -1;
984     if (y_index < 0 || y_index >= mb_height)
985         return -1;
986
987     mb_index = y_index * mb_width + x_index;
988     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
989         return -1;
990     return 0;
991 }
992
993 void
994 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
995                                      struct encode_state *encode_state,
996                                      int mb_width, int mb_height,
997                                      int kernel,
998                                      int transform_8x8_mode_flag,
999                                      struct intel_encoder_context *encoder_context)
1000 {
1001     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1002     int mb_row;
1003     int s;
1004     unsigned int *command_ptr;
1005     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1006     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1007     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1008     int qp,qp_mb,qp_index;
1009     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1010
1011     if (encoder_context->rate_control_mode == VA_RC_CQP)
1012         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1013     else
1014         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1015
1016 #define         USE_SCOREBOARD          (1 << 21)
1017  
1018     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1019     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1020
1021     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1022         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1023         int first_mb = pSliceParameter->macroblock_address;
1024         int num_mb = pSliceParameter->num_macroblocks;
1025         unsigned int mb_intra_ub, score_dep;
1026         int x_outer, y_outer, x_inner, y_inner;
1027         int xtemp_outer = 0;
1028
1029         x_outer = first_mb % mb_width;
1030         y_outer = first_mb / mb_width;
1031         mb_row = y_outer;
1032
1033         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1034             x_inner = x_outer;
1035             y_inner = y_outer;
1036             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1037                 mb_intra_ub = 0;
1038                 score_dep = 0;
1039                 if (x_inner != 0) {
1040                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1041                     score_dep |= MB_SCOREBOARD_A; 
1042                 }
1043                 if (y_inner != mb_row) {
1044                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1045                     score_dep |= MB_SCOREBOARD_B;
1046                     if (x_inner != 0)
1047                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1048                     if (x_inner != (mb_width -1)) {
1049                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1050                         score_dep |= MB_SCOREBOARD_C;
1051                     }
1052                 }
1053
1054                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1055                 *command_ptr++ = kernel;
1056                 *command_ptr++ = USE_SCOREBOARD;
1057                 /* Indirect data */
1058                 *command_ptr++ = 0;
1059                 /* the (X, Y) term of scoreboard */
1060                 *command_ptr++ = ((y_inner << 16) | x_inner);
1061                 *command_ptr++ = score_dep;
1062                 /*inline data */
1063                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1064                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1065                 /* QP occupies one byte */
1066                 if (vme_context->roi_enabled) {
1067                     qp_index = y_inner * mb_width + x_inner;
1068                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1069                 } else
1070                     qp_mb = qp;
1071                 *command_ptr++ = qp_mb;
1072                 x_inner -= 2;
1073                 y_inner += 1;
1074             }
1075             x_outer += 1;
1076         }
1077
1078         xtemp_outer = mb_width - 2;
1079         if (xtemp_outer < 0)
1080             xtemp_outer = 0;
1081         x_outer = xtemp_outer;
1082         y_outer = first_mb / mb_width;
1083         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1084             y_inner = y_outer;
1085             x_inner = x_outer;
1086             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1087                 mb_intra_ub = 0;
1088                 score_dep = 0;
1089                 if (x_inner != 0) {
1090                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1091                     score_dep |= MB_SCOREBOARD_A; 
1092                 }
1093                 if (y_inner != mb_row) {
1094                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1095                     score_dep |= MB_SCOREBOARD_B;
1096                     if (x_inner != 0)
1097                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1098
1099                     if (x_inner != (mb_width -1)) {
1100                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1101                         score_dep |= MB_SCOREBOARD_C;
1102                     }
1103                 }
1104
1105                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1106                 *command_ptr++ = kernel;
1107                 *command_ptr++ = USE_SCOREBOARD;
1108                 /* Indirect data */
1109                 *command_ptr++ = 0;
1110                 /* the (X, Y) term of scoreboard */
1111                 *command_ptr++ = ((y_inner << 16) | x_inner);
1112                 *command_ptr++ = score_dep;
1113                 /*inline data */
1114                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1115                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1116                 /* qp occupies one byte */
1117                 if (vme_context->roi_enabled) {
1118                     qp_index = y_inner * mb_width + x_inner;
1119                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1120                 } else
1121                     qp_mb = qp;
1122                 *command_ptr++ = qp_mb;
1123
1124                 x_inner -= 2;
1125                 y_inner += 1;
1126             }
1127             x_outer++;
1128             if (x_outer >= mb_width) {
1129                 y_outer += 1;
1130                 x_outer = xtemp_outer;
1131             }           
1132         }
1133     }
1134
1135     *command_ptr++ = 0;
1136     *command_ptr++ = MI_BATCH_BUFFER_END;
1137
1138     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1139 }
1140
1141 static uint8_t
1142 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1143 {
1144     unsigned int is_long_term =
1145         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1146     unsigned int is_top_field =
1147         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1148     unsigned int is_bottom_field =
1149         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1150
1151     return ((is_long_term                         << 6) |
1152             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1153             (frame_store_id                       << 1) |
1154             ((is_top_field ^ 1) & is_bottom_field));
1155 }
1156
1157 void
1158 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1159                             struct encode_state *encode_state,
1160                             struct intel_encoder_context *encoder_context)
1161 {
1162     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1163     struct intel_batchbuffer *batch = encoder_context->base.batch;
1164     int slice_type;
1165     struct object_surface *obj_surface;
1166     unsigned int fref_entry, bref_entry;
1167     int frame_index, i;
1168     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1169
1170     fref_entry = 0x80808080;
1171     bref_entry = 0x80808080;
1172     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1173
1174     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1175         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1176
1177         if (ref_idx_l0 > 3) {
1178             WARN_ONCE("ref_idx_l0 is out of range\n");
1179             ref_idx_l0 = 0;
1180         }
1181
1182         obj_surface = vme_context->used_reference_objects[0];
1183         frame_index = -1;
1184         for (i = 0; i < 16; i++) {
1185             if (obj_surface &&
1186                 obj_surface == encode_state->reference_objects[i]) {
1187                 frame_index = i;
1188                 break;
1189             }
1190         }
1191         if (frame_index == -1) {
1192             WARN_ONCE("RefPicList0 is not found in DPB!\n");
1193         } else {
1194             int ref_idx_l0_shift = ref_idx_l0 * 8;
1195             fref_entry &= ~(0xFF << ref_idx_l0_shift);
1196             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1197         }
1198     }
1199
1200     if (slice_type == SLICE_TYPE_B) {
1201         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1202
1203         if (ref_idx_l1 > 3) {
1204             WARN_ONCE("ref_idx_l1 is out of range\n");
1205             ref_idx_l1 = 0;
1206         }
1207
1208         obj_surface = vme_context->used_reference_objects[1];
1209         frame_index = -1;
1210         for (i = 0; i < 16; i++) {
1211             if (obj_surface &&
1212                 obj_surface == encode_state->reference_objects[i]) {
1213                 frame_index = i;
1214                 break;
1215             }
1216         }
1217         if (frame_index == -1) {
1218             WARN_ONCE("RefPicList1 is not found in DPB!\n");
1219         } else {
1220             int ref_idx_l1_shift = ref_idx_l1 * 8;
1221             bref_entry &= ~(0xFF << ref_idx_l1_shift);
1222             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1223         }
1224     }
1225
1226     BEGIN_BCS_BATCH(batch, 10);
1227     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1228     OUT_BCS_BATCH(batch, 0);                  //Select L0
1229     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1230     for(i = 0; i < 7; i++) {
1231         OUT_BCS_BATCH(batch, 0x80808080);
1232     }
1233     ADVANCE_BCS_BATCH(batch);
1234
1235     BEGIN_BCS_BATCH(batch, 10);
1236     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1237     OUT_BCS_BATCH(batch, 1);                  //Select L1
1238     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1239     for(i = 0; i < 7; i++) {
1240         OUT_BCS_BATCH(batch, 0x80808080);
1241     }
1242     ADVANCE_BCS_BATCH(batch);
1243 }
1244
1245
1246 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1247                                  struct encode_state *encode_state,
1248                                  struct intel_encoder_context *encoder_context)
1249 {
1250     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1251     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1252     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1253     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1254     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1255     uint32_t mv_x, mv_y;
1256     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1257     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1258     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1259
1260     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1261         mv_x = 512;
1262         mv_y = 64;
1263     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1264         mv_x = 1024;
1265         mv_y = 128;
1266     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1267         mv_x = 2048;
1268         mv_y = 128;
1269     } else {
1270         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1271         mv_x = 512;
1272         mv_y = 64;
1273     }
1274
1275     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1276     if (pic_param->picture_type != VAEncPictureTypeIntra) {
1277         int qp, m_cost, j, mv_count;
1278         float   lambda, m_costf;
1279         slice_param = (VAEncSliceParameterBufferMPEG2 *)
1280             encode_state->slice_params_ext[0]->buffer;
1281         qp = slice_param->quantiser_scale_code;
1282         lambda = intel_lambda_qp(qp);
1283         /* No Intra prediction. So it is zero */
1284         vme_state_message[MODE_INTRA_8X8] = 0;
1285         vme_state_message[MODE_INTRA_4X4] = 0;
1286         vme_state_message[MODE_INTER_MV0] = 0;
1287         for (j = 1; j < 3; j++) {
1288             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1289             m_cost = (int)m_costf;
1290             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1291         }
1292         mv_count = 3;
1293         for (j = 4; j <= 64; j *= 2) {
1294             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1295             m_cost = (int)m_costf;
1296             vme_state_message[MODE_INTER_MV0 + mv_count] =
1297                 intel_format_lutvalue(m_cost, 0x6f);
1298             mv_count++;
1299         }
1300         m_cost = lambda;
1301         /* It can only perform the 16x16 search. So mode cost can be ignored for
1302          * the other mode. for example: 16x8/8x8
1303          */
1304         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1305         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1306
1307         vme_state_message[MODE_INTER_16X8] = 0;
1308         vme_state_message[MODE_INTER_8X8] = 0;
1309         vme_state_message[MODE_INTER_8X4] = 0;
1310         vme_state_message[MODE_INTER_4X4] = 0;
1311         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1312
1313     }
1314     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1315
1316     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1317         width_in_mbs;
1318 }
1319
1320 void
1321 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1322                                            struct encode_state *encode_state,
1323                                            int mb_width, int mb_height,
1324                                            int kernel,
1325                                            struct intel_encoder_context *encoder_context)
1326 {
1327     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1328     unsigned int *command_ptr;
1329
1330 #define         MPEG2_SCOREBOARD                (1 << 21)
1331
1332     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1333     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1334
1335     {
1336         unsigned int mb_intra_ub, score_dep;
1337         int x_outer, y_outer, x_inner, y_inner;
1338         int xtemp_outer = 0;
1339         int first_mb = 0;
1340         int num_mb = mb_width * mb_height;
1341
1342         x_outer = 0;
1343         y_outer = 0;
1344
1345
1346         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1347             x_inner = x_outer;
1348             y_inner = y_outer;
1349             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1350                 mb_intra_ub = 0;
1351                 score_dep = 0;
1352                 if (x_inner != 0) {
1353                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1354                     score_dep |= MB_SCOREBOARD_A; 
1355                 }
1356                 if (y_inner != 0) {
1357                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1358                     score_dep |= MB_SCOREBOARD_B;
1359
1360                     if (x_inner != 0)
1361                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1362
1363                     if (x_inner != (mb_width -1)) {
1364                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1365                         score_dep |= MB_SCOREBOARD_C;
1366                     }
1367                 }
1368
1369                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1370                 *command_ptr++ = kernel;
1371                 *command_ptr++ = MPEG2_SCOREBOARD;
1372                 /* Indirect data */
1373                 *command_ptr++ = 0;
1374                 /* the (X, Y) term of scoreboard */
1375                 *command_ptr++ = ((y_inner << 16) | x_inner);
1376                 *command_ptr++ = score_dep;
1377                 /*inline data */
1378                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1379                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1380                 x_inner -= 2;
1381                 y_inner += 1;
1382             }
1383             x_outer += 1;
1384         }
1385
1386         xtemp_outer = mb_width - 2;
1387         if (xtemp_outer < 0)
1388             xtemp_outer = 0;
1389         x_outer = xtemp_outer;
1390         y_outer = 0;
1391         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1392             y_inner = y_outer;
1393             x_inner = x_outer;
1394             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1395                 mb_intra_ub = 0;
1396                 score_dep = 0;
1397                 if (x_inner != 0) {
1398                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1399                     score_dep |= MB_SCOREBOARD_A; 
1400                 }
1401                 if (y_inner != 0) {
1402                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1403                     score_dep |= MB_SCOREBOARD_B;
1404
1405                     if (x_inner != 0)
1406                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1407
1408                     if (x_inner != (mb_width -1)) {
1409                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1410                         score_dep |= MB_SCOREBOARD_C;
1411                     }
1412                 }
1413
1414                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1415                 *command_ptr++ = kernel;
1416                 *command_ptr++ = MPEG2_SCOREBOARD;
1417                 /* Indirect data */
1418                 *command_ptr++ = 0;
1419                 /* the (X, Y) term of scoreboard */
1420                 *command_ptr++ = ((y_inner << 16) | x_inner);
1421                 *command_ptr++ = score_dep;
1422                 /*inline data */
1423                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1424                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1425
1426                 x_inner -= 2;
1427                 y_inner += 1;
1428             }
1429             x_outer++;
1430             if (x_outer >= mb_width) {
1431                 y_outer += 1;
1432                 x_outer = xtemp_outer;
1433             }           
1434         }
1435     }
1436
1437     *command_ptr++ = 0;
1438     *command_ptr++ = MI_BATCH_BUFFER_END;
1439
1440     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1441     return;
1442 }
1443
1444 static int
1445 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1446                           VAPictureH264 *ref_list,
1447                           int num_pictures,
1448                           int dir)
1449 {
1450     int i, found = -1, min = 0x7FFFFFFF;
1451
1452     for (i = 0; i < num_pictures; i++) {
1453         int tmp;
1454
1455         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1456             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1457             break;
1458
1459         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1460
1461         if (dir)
1462             tmp = -tmp;
1463
1464         if (tmp > 0 && tmp < min) {
1465             min = tmp;
1466             found = i;
1467         }
1468     }
1469
1470     return found;
1471 }
1472
1473 void
1474 intel_avc_vme_reference_state(VADriverContextP ctx,
1475                               struct encode_state *encode_state,
1476                               struct intel_encoder_context *encoder_context,
1477                               int list_index,
1478                               int surface_index,
1479                               void (* vme_source_surface_state)(
1480                                   VADriverContextP ctx,
1481                                   int index,
1482                                   struct object_surface *obj_surface,
1483                                   struct intel_encoder_context *encoder_context))
1484 {
1485     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1486     struct object_surface *obj_surface = NULL;
1487     struct i965_driver_data *i965 = i965_driver_data(ctx);
1488     VASurfaceID ref_surface_id;
1489     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1490     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1491     int max_num_references;
1492     VAPictureH264 *curr_pic;
1493     VAPictureH264 *ref_list;
1494     int ref_idx;
1495
1496     if (list_index == 0) {
1497         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1498         ref_list = slice_param->RefPicList0;
1499     } else {
1500         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1501         ref_list = slice_param->RefPicList1;
1502     }
1503
1504     if (max_num_references == 1) {
1505         if (list_index == 0) {
1506             ref_surface_id = slice_param->RefPicList0[0].picture_id;
1507             vme_context->used_references[0] = &slice_param->RefPicList0[0];
1508         } else {
1509             ref_surface_id = slice_param->RefPicList1[0].picture_id;
1510             vme_context->used_references[1] = &slice_param->RefPicList1[0];
1511         }
1512
1513         if (ref_surface_id != VA_INVALID_SURFACE)
1514             obj_surface = SURFACE(ref_surface_id);
1515
1516         if (!obj_surface ||
1517             !obj_surface->bo) {
1518             obj_surface = encode_state->reference_objects[list_index];
1519             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1520         }
1521
1522         ref_idx = 0;
1523     } else {
1524         curr_pic = &pic_param->CurrPic;
1525
1526         /* select the reference frame in temporal space */
1527         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1528         ref_surface_id = ref_list[ref_idx].picture_id;
1529
1530         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1531             obj_surface = SURFACE(ref_surface_id);
1532
1533         vme_context->used_reference_objects[list_index] = obj_surface;
1534         vme_context->used_references[list_index] = &ref_list[ref_idx];
1535     }
1536
1537     if (obj_surface &&
1538         obj_surface->bo) {
1539         assert(ref_idx >= 0);
1540         vme_context->used_reference_objects[list_index] = obj_surface;
1541         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1542         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1543                                                     ref_idx << 16 |
1544                                                     ref_idx <<  8 |
1545                                                     ref_idx);
1546     } else {
1547         vme_context->used_reference_objects[list_index] = NULL;
1548         vme_context->used_references[list_index] = NULL;
1549         vme_context->ref_index_in_mb[list_index] = 0;
1550     }
1551 }
1552
1553 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1554                                         struct encode_state *encode_state,
1555                                         struct intel_encoder_context *encoder_context,
1556                                         int slice_index,
1557                                         struct intel_batchbuffer *slice_batch)
1558 {
1559     int count, i, start_index;
1560     unsigned int length_in_bits;
1561     VAEncPackedHeaderParameterBuffer *param = NULL;
1562     unsigned int *header_data = NULL;
1563     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1564     int slice_header_index;
1565
1566     if (encode_state->slice_header_index[slice_index] == 0)
1567         slice_header_index = -1;
1568     else
1569         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1570
1571     count = encode_state->slice_rawdata_count[slice_index];
1572     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1573
1574     for (i = 0; i < count; i++) {
1575         unsigned int skip_emul_byte_cnt;
1576
1577         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1578
1579         param = (VAEncPackedHeaderParameterBuffer *)
1580                     (encode_state->packed_header_params_ext[start_index + i]->buffer);
1581
1582         /* skip the slice header packed data type as it is lastly inserted */
1583         if (param->type == VAEncPackedHeaderSlice)
1584             continue;
1585
1586         length_in_bits = param->bit_length;
1587
1588         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1589
1590         /* as the slice header is still required, the last header flag is set to
1591          * zero.
1592          */
1593         mfc_context->insert_object(ctx,
1594                                    encoder_context,
1595                                    header_data,
1596                                    ALIGN(length_in_bits, 32) >> 5,
1597                                    length_in_bits & 0x1f,
1598                                    skip_emul_byte_cnt,
1599                                    0,
1600                                    0,
1601                                    !param->has_emulation_bytes,
1602                                    slice_batch);
1603     }
1604
1605     if (slice_header_index == -1) {
1606         unsigned char *slice_header = NULL;
1607         int slice_header_length_in_bits = 0;
1608         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1609         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1610         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1611
1612         /* No slice header data is passed. And the driver needs to generate it */
1613         /* For the Normal H264 */
1614         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1615                                                              pPicParameter,
1616                                                              pSliceParameter,
1617                                                              &slice_header);
1618         mfc_context->insert_object(ctx, encoder_context,
1619                                    (unsigned int *)slice_header,
1620                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1621                                    slice_header_length_in_bits & 0x1f,
1622                                    5,  /* first 5 bytes are start code + nal unit type */
1623                                    1, 0, 1, slice_batch);
1624
1625         free(slice_header);
1626     } else {
1627         unsigned int skip_emul_byte_cnt;
1628
1629         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1630
1631         param = (VAEncPackedHeaderParameterBuffer *)
1632                     (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1633         length_in_bits = param->bit_length;
1634
1635         /* as the slice header is the last header data for one slice,
1636          * the last header flag is set to one.
1637          */
1638         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1639
1640         mfc_context->insert_object(ctx,
1641                                    encoder_context,
1642                                    header_data,
1643                                    ALIGN(length_in_bits, 32) >> 5,
1644                                    length_in_bits & 0x1f,
1645                                    skip_emul_byte_cnt,
1646                                    1,
1647                                    0,
1648                                    !param->has_emulation_bytes,
1649                                    slice_batch);
1650     }
1651
1652     return;
1653 }
1654
1655 void
1656 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1657                                 struct encode_state *encode_state,
1658                                 struct intel_encoder_context *encoder_context)
1659 {
1660     struct i965_driver_data *i965 = i965_driver_data(ctx);
1661     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1662     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1663     int qp;
1664     dri_bo *bo;
1665     uint8_t *cost_table;
1666
1667     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1668
1669
1670     if (slice_type == SLICE_TYPE_I) {
1671         if (vme_context->i_qp_cost_table)
1672             return;
1673     } else if (slice_type == SLICE_TYPE_P) {
1674         if (vme_context->p_qp_cost_table)
1675             return;
1676     } else {
1677         if (vme_context->b_qp_cost_table)
1678             return;
1679     }
1680
1681     /* It is enough to allocate 32 bytes for each qp. */
1682     bo = dri_bo_alloc(i965->intel.bufmgr,
1683                       "cost_table ",
1684                       QP_MAX * 32,
1685                       64);
1686
1687     dri_bo_map(bo, 1);
1688     assert(bo->virtual);
1689     cost_table = (uint8_t *)(bo->virtual);
1690     for (qp = 0; qp < QP_MAX; qp++) {
1691         intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1692         cost_table += 32;
1693     }
1694
1695     dri_bo_unmap(bo);
1696
1697     if (slice_type == SLICE_TYPE_I) {
1698         vme_context->i_qp_cost_table = bo;
1699     } else if (slice_type == SLICE_TYPE_P) {
1700         vme_context->p_qp_cost_table = bo;
1701     } else {
1702         vme_context->b_qp_cost_table = bo;
1703     }
1704
1705     vme_context->cost_table_size = QP_MAX * 32;
1706     return;
1707 }
1708
1709 extern void
1710 intel_h264_setup_cost_surface(VADriverContextP ctx,
1711                               struct encode_state *encode_state,
1712                               struct intel_encoder_context *encoder_context,
1713                               unsigned long binding_table_offset,
1714                               unsigned long surface_state_offset)
1715 {
1716     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1717     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1718     dri_bo *bo;
1719
1720
1721     struct i965_buffer_surface cost_table;
1722
1723     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1724
1725
1726     if (slice_type == SLICE_TYPE_I) {
1727         bo = vme_context->i_qp_cost_table;
1728     } else if (slice_type == SLICE_TYPE_P) {
1729         bo = vme_context->p_qp_cost_table;
1730     } else {
1731         bo = vme_context->b_qp_cost_table;
1732     }
1733
1734     cost_table.bo = bo;
1735     cost_table.num_blocks = QP_MAX;
1736     cost_table.pitch = 16;
1737     cost_table.size_block = 32;
1738
1739     vme_context->vme_buffer_suface_setup(ctx,
1740                                          &vme_context->gpe_context,
1741                                          &cost_table,
1742                                          binding_table_offset,
1743                                          surface_state_offset);
1744 }
1745
1746 /*
1747  * the idea of conversion between qp and qstep comes from scaling process
1748  * of transform coeff for Luma component in H264 spec.
1749  *   2^(Qpy / 6 - 6)
1750  * In order to avoid too small qstep, it is multiplied by 16.
1751  */
1752 static float intel_h264_qp_qstep(int qp)
1753 {
1754     float value, qstep;
1755     value = qp;
1756     value = value / 6 - 2;
1757     qstep = powf(2, value);
1758     return qstep;
1759 }
1760
1761 static int intel_h264_qstep_qp(float qstep)
1762 {
1763     float qp;
1764
1765     qp = 12.0f + 6.0f * log2f(qstep);
1766
1767     return floorf(qp);
1768 }
1769
1770 /*
1771  * Currently it is based on the following assumption:
1772  * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1773  *                                 total_aread * 1 / baseqp_qstep
1774  *
1775  * qstep is the linearized quantizer of H264 quantizer
1776  */
1777 typedef struct {
1778     int row_start_in_mb;
1779     int row_end_in_mb;
1780     int col_start_in_mb;
1781     int col_end_in_mb;
1782
1783     int width_mbs;
1784     int height_mbs;
1785
1786     int roi_qp;
1787 } ROIRegionParam;
1788
1789 static VAStatus
1790 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1791                        int base_qp,
1792                        VAEncMiscParameterBufferROI *pMiscParamROI,
1793                        struct encode_state *encode_state,
1794                        struct intel_encoder_context *encoder_context)
1795 {
1796     int nonroi_qp;
1797     VAEncROI *region_roi;
1798     bool quickfill = 0;
1799
1800     ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1801     int num_roi = 0;
1802     int i,j;
1803
1804     float temp;
1805     float qstep_nonroi, qstep_base;
1806     float roi_area, total_area, nonroi_area;
1807     float sum_roi;
1808
1809     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1810     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1811     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1812     int mbs_in_picture = width_in_mbs * height_in_mbs;
1813
1814     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1815     VAStatus vaStatus = VA_STATUS_SUCCESS;
1816
1817     if(pMiscParamROI != NULL)
1818     {
1819         num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi;
1820
1821         /* currently roi_value_is_qp_delta is the only supported mode of priority.
1822         *
1823         * qp_delta set by user is added to base_qp, which is then clapped by
1824         * [base_qp-min_delta, base_qp+max_delta].
1825         */
1826         ASSERT_RET(pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta,VA_STATUS_ERROR_INVALID_PARAMETER);
1827     }
1828
1829     /* when the base_qp is lower than 12, the quality is quite good based
1830      * on the H264 test experience.
1831      * In such case it is unnecessary to adjust the quality for ROI region.
1832      */
1833     if (base_qp <= 12) {
1834         nonroi_qp = base_qp;
1835         quickfill = 1;
1836         goto qp_fill;
1837     }
1838
1839     sum_roi = 0.0f;
1840     roi_area = 0;
1841     for (i = 0; i < num_roi; i++) {
1842         int row_start, row_end, col_start, col_end;
1843         int roi_width_mbs, roi_height_mbs;
1844         int mbs_in_roi;
1845         int roi_qp;
1846         float qstep_roi;
1847
1848         region_roi =  (VAEncROI *)pMiscParamROI->roi + i;
1849
1850         col_start = region_roi->roi_rectangle.x;
1851         col_end = col_start + region_roi->roi_rectangle.width;
1852         row_start = region_roi->roi_rectangle.y;
1853         row_end = row_start + region_roi->roi_rectangle.height;
1854         col_start = col_start / 16;
1855         col_end = (col_end + 15) / 16;
1856         row_start = row_start / 16;
1857         row_end = (row_end + 15) / 16;
1858
1859         roi_width_mbs = col_end - col_start;
1860         roi_height_mbs = row_end - row_start;
1861         mbs_in_roi = roi_width_mbs * roi_height_mbs;
1862
1863         param_regions[i].row_start_in_mb = row_start;
1864         param_regions[i].row_end_in_mb = row_end;
1865         param_regions[i].col_start_in_mb = col_start;
1866         param_regions[i].col_end_in_mb = col_end;
1867         param_regions[i].width_mbs = roi_width_mbs;
1868         param_regions[i].height_mbs = roi_height_mbs;
1869
1870         roi_qp = base_qp + region_roi->roi_value;
1871         BRC_CLIP(roi_qp, 1, 51);
1872
1873         param_regions[i].roi_qp = roi_qp;
1874         qstep_roi = intel_h264_qp_qstep(roi_qp);
1875
1876         roi_area += mbs_in_roi;
1877         sum_roi += mbs_in_roi / qstep_roi;
1878     }
1879
1880     total_area = mbs_in_picture;
1881     nonroi_area = total_area - roi_area;
1882
1883     qstep_base = intel_h264_qp_qstep(base_qp);
1884     temp = (total_area / qstep_base - sum_roi);
1885
1886     if (temp < 0) {
1887         nonroi_qp = 51;
1888     } else {
1889         qstep_nonroi = nonroi_area / temp;
1890         nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
1891     }
1892
1893     BRC_CLIP(nonroi_qp, 1, 51);
1894
1895 qp_fill:
1896     memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
1897     if (!quickfill) {
1898         char *qp_ptr;
1899
1900         for (i = 0; i < num_roi; i++) {
1901             for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
1902                 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
1903                 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
1904             }
1905         }
1906     }
1907     return vaStatus;
1908 }
1909
1910 extern void
1911 intel_h264_enc_roi_config(VADriverContextP ctx,
1912                           struct encode_state *encode_state,
1913                           struct intel_encoder_context *encoder_context)
1914 {
1915     char *qp_ptr;
1916     int i, j;
1917     VAEncROI *region_roi;
1918     struct i965_driver_data *i965 = i965_driver_data(ctx);
1919     VAEncMiscParameterBuffer* pMiscParamROI;
1920     VAEncMiscParameterBufferROI *pParamROI = NULL;
1921     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1922     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1923     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1924     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1925     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1926
1927     int row_start, row_end, col_start, col_end;
1928     int num_roi = 0;
1929
1930     vme_context->roi_enabled = 0;
1931     /* Restriction: Disable ROI when multi-slice is enabled */
1932     if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1933         return;
1934
1935     if (encode_state->misc_param[VAEncMiscParameterTypeROI][0] != NULL) {
1936         pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI][0]->buffer;
1937         pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
1938
1939         /* check whether number of ROI is correct */
1940         num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi;
1941     }
1942
1943     if (num_roi > 0)
1944         vme_context->roi_enabled = 1;
1945
1946     if (!vme_context->roi_enabled)
1947         return;
1948
1949     if ((vme_context->saved_width_mbs !=  width_in_mbs) ||
1950         (vme_context->saved_height_mbs != height_in_mbs)) {
1951         free(vme_context->qp_per_mb);
1952         vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1953
1954         vme_context->saved_width_mbs = width_in_mbs;
1955         vme_context->saved_height_mbs = height_in_mbs;
1956         assert(vme_context->qp_per_mb);
1957     }
1958     if (encoder_context->rate_control_mode == VA_RC_CBR) {
1959         /*
1960          * TODO: More complex Qp adjust needs to be added.
1961          * Currently it is initialized to slice_qp.
1962          */
1963         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1964         int qp;
1965         int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1966
1967         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1968         intel_h264_enc_roi_cbr(ctx, qp, pParamROI,encode_state, encoder_context);
1969
1970     } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1971         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1972         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1973         int qp;
1974
1975         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1976         memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1977
1978
1979         for (j = num_roi; j ; j--) {
1980             int qp_delta, qp_clip;
1981
1982             region_roi =  (VAEncROI *)pParamROI->roi + j - 1;
1983
1984             col_start = region_roi->roi_rectangle.x;
1985             col_end = col_start + region_roi->roi_rectangle.width;
1986             row_start = region_roi->roi_rectangle.y;
1987             row_end = row_start + region_roi->roi_rectangle.height;
1988
1989             col_start = col_start / 16;
1990             col_end = (col_end + 15) / 16;
1991             row_start = row_start / 16;
1992             row_end = (row_end + 15) / 16;
1993
1994             qp_delta = region_roi->roi_value;
1995             qp_clip = qp + qp_delta;
1996
1997             BRC_CLIP(qp_clip, 1, 51);
1998
1999             for (i = row_start; i < row_end; i++) {
2000                 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
2001                 memset(qp_ptr, qp_clip, (col_end - col_start));
2002             }
2003         }
2004     } else {
2005         /*
2006          * TODO: Disable it for non CBR-CQP.
2007          */
2008         vme_context->roi_enabled = 0;
2009     }
2010
2011     if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2012         encoder_context->soft_batch_force = 1;
2013
2014     return;
2015 }
2016
2017 /* HEVC */
2018 static int
2019 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2020                            VAPictureHEVC *ref_list,
2021                            int num_pictures,
2022                            int dir)
2023 {
2024     int i, found = -1, min = 0x7FFFFFFF;
2025
2026     for (i = 0; i < num_pictures; i++) {
2027         int tmp;
2028
2029         if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2030             (ref_list[i].picture_id == VA_INVALID_SURFACE))
2031             break;
2032
2033         tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2034
2035         if (dir)
2036             tmp = -tmp;
2037
2038         if (tmp > 0 && tmp < min) {
2039             min = tmp;
2040             found = i;
2041         }
2042     }
2043
2044     return found;
2045 }
2046 void
2047 intel_hevc_vme_reference_state(VADriverContextP ctx,
2048                                struct encode_state *encode_state,
2049                                struct intel_encoder_context *encoder_context,
2050                                int list_index,
2051                                int surface_index,
2052                                void (* vme_source_surface_state)(
2053                                    VADriverContextP ctx,
2054                                    int index,
2055                                    struct object_surface *obj_surface,
2056                                    struct intel_encoder_context *encoder_context))
2057 {
2058     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2059     struct object_surface *obj_surface = NULL;
2060     struct i965_driver_data *i965 = i965_driver_data(ctx);
2061     VASurfaceID ref_surface_id;
2062     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2063     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2064     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2065     int max_num_references;
2066     VAPictureHEVC *curr_pic;
2067     VAPictureHEVC *ref_list;
2068     int ref_idx;
2069     unsigned int is_hevc10 = 0;
2070     GenHevcSurface *hevc_encoder_surface = NULL;
2071
2072     if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2073         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2074         is_hevc10 = 1;
2075
2076     if (list_index == 0) {
2077         max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2078         ref_list = slice_param->ref_pic_list0;
2079     } else {
2080         max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2081         ref_list = slice_param->ref_pic_list1;
2082     }
2083
2084     if (max_num_references == 1) {
2085         if (list_index == 0) {
2086             ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2087             vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2088         } else {
2089             ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2090             vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2091         }
2092
2093         if (ref_surface_id != VA_INVALID_SURFACE)
2094             obj_surface = SURFACE(ref_surface_id);
2095
2096         if (!obj_surface ||
2097             !obj_surface->bo) {
2098             obj_surface = encode_state->reference_objects[list_index];
2099             vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2100         }
2101
2102         ref_idx = 0;
2103     } else {
2104         curr_pic = &pic_param->decoded_curr_pic;
2105
2106         /* select the reference frame in temporal space */
2107         ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2108         ref_surface_id = ref_list[ref_idx].picture_id;
2109
2110         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2111             obj_surface = SURFACE(ref_surface_id);
2112
2113         vme_context->used_reference_objects[list_index] = obj_surface;
2114         vme_context->used_references[list_index] = &ref_list[ref_idx];
2115     }
2116
2117     if (obj_surface &&
2118         obj_surface->bo) {
2119         assert(ref_idx >= 0);
2120         vme_context->used_reference_objects[list_index] = obj_surface;
2121
2122         if(is_hevc10){
2123             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2124             assert(hevc_encoder_surface);
2125             obj_surface = hevc_encoder_surface->nv12_surface_obj;
2126         }
2127         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2128         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2129                 ref_idx << 16 |
2130                 ref_idx <<  8 |
2131                 ref_idx);
2132     } else {
2133         vme_context->used_reference_objects[list_index] = NULL;
2134         vme_context->used_references[list_index] = NULL;
2135         vme_context->ref_index_in_mb[list_index] = 0;
2136     }
2137 }
2138
2139 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2140                                      struct encode_state *encode_state,
2141                                      struct intel_encoder_context *encoder_context)
2142 {
2143     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2144     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2145     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2146     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2147     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2148     int qp, m_cost, j, mv_count;
2149     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2150     float   lambda, m_costf;
2151
2152     /* here no SI SP slice for HEVC, do not need slice fixup */
2153     int slice_type = slice_param->slice_type;
2154
2155
2156     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2157
2158     if(encoder_context->rate_control_mode == VA_RC_CBR)
2159     {
2160         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2161         if(slice_type == HEVC_SLICE_B) {
2162             if(pSequenceParameter->ip_period == 1)
2163             {
2164                 slice_type = HEVC_SLICE_P;
2165                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2166
2167             }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2168                 slice_type = HEVC_SLICE_P;
2169                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2170             }
2171         }
2172
2173     }
2174
2175     if (vme_state_message == NULL)
2176         return;
2177
2178     assert(qp <= QP_MAX);
2179     lambda = intel_lambda_qp(qp);
2180     if (slice_type == HEVC_SLICE_I) {
2181         vme_state_message[MODE_INTRA_16X16] = 0;
2182         m_cost = lambda * 4;
2183         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2184         m_cost = lambda * 16;
2185         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2186         m_cost = lambda * 3;
2187         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2188     } else {
2189         m_cost = 0;
2190         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2191         for (j = 1; j < 3; j++) {
2192             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2193             m_cost = (int)m_costf;
2194             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2195         }
2196         mv_count = 3;
2197         for (j = 4; j <= 64; j *= 2) {
2198             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2199             m_cost = (int)m_costf;
2200             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2201             mv_count++;
2202         }
2203
2204         if (qp <= 25) {
2205             vme_state_message[MODE_INTRA_16X16] = 0x4a;
2206             vme_state_message[MODE_INTRA_8X8] = 0x4a;
2207             vme_state_message[MODE_INTRA_4X4] = 0x4a;
2208             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2209             vme_state_message[MODE_INTER_16X16] = 0x4a;
2210             vme_state_message[MODE_INTER_16X8] = 0x4a;
2211             vme_state_message[MODE_INTER_8X8] = 0x4a;
2212             vme_state_message[MODE_INTER_8X4] = 0x4a;
2213             vme_state_message[MODE_INTER_4X4] = 0x4a;
2214             vme_state_message[MODE_INTER_BWD] = 0x2a;
2215             return;
2216         }
2217         m_costf = lambda * 10;
2218         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2219         m_cost = lambda * 14;
2220         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2221         m_cost = lambda * 24;
2222         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2223         m_costf = lambda * 3.5;
2224         m_cost = m_costf;
2225         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2226         if (slice_type == HEVC_SLICE_P) {
2227             m_costf = lambda * 2.5;
2228             m_cost = m_costf;
2229             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2230             m_costf = lambda * 4;
2231             m_cost = m_costf;
2232             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2233             m_costf = lambda * 1.5;
2234             m_cost = m_costf;
2235             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2236             m_costf = lambda * 3;
2237             m_cost = m_costf;
2238             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2239             m_costf = lambda * 5;
2240             m_cost = m_costf;
2241             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2242             /* BWD is not used in P-frame */
2243             vme_state_message[MODE_INTER_BWD] = 0;
2244         } else {
2245             m_costf = lambda * 2.5;
2246             m_cost = m_costf;
2247             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2248             m_costf = lambda * 5.5;
2249             m_cost = m_costf;
2250             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2251             m_costf = lambda * 3.5;
2252             m_cost = m_costf;
2253             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2254             m_costf = lambda * 5.0;
2255             m_cost = m_costf;
2256             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2257             m_costf = lambda * 6.5;
2258             m_cost = m_costf;
2259             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2260             m_costf = lambda * 1.5;
2261             m_cost = m_costf;
2262             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
2263         }
2264     }
2265 }