OSDN Git Service

Update CBR algo for H.264 per tempolar layer
[android-x86/hardware-intel-common-vaapi.git] / src / gen6_mfc_common.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "gen9_mfc.h"
45 #include "intel_media.h"
46
47 #ifndef HAVE_LOG2F
48 #define log2f(x) (logf(x)/(float)M_LN2)
49 #endif
50
51 int intel_avc_enc_slice_type_fixup(int slice_type)
52 {
53     if (slice_type == SLICE_TYPE_SP ||
54         slice_type == SLICE_TYPE_P)
55         slice_type = SLICE_TYPE_P;
56     else if (slice_type == SLICE_TYPE_SI ||
57              slice_type == SLICE_TYPE_I)
58         slice_type = SLICE_TYPE_I;
59     else {
60         if (slice_type != SLICE_TYPE_B)
61             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
62
63         slice_type = SLICE_TYPE_B;
64     }
65
66     return slice_type;
67 }
68
69 static void
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
71                                         struct intel_encoder_context *encoder_context)
72 {
73     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
74     int i;
75
76     for(i = 0 ; i < 3; i++) {
77         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79         mfc_context->bit_rate_control_context[i].GrowInit = 6;
80         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
83         
84         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
90     }
91 }
92
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94                                struct intel_encoder_context* encoder_context)
95 {
96     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97     double bitrate, framerate;
98     double qp1_size = 0.1 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99     double qp51_size = 0.001 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
100     double bpf, factor;
101     int inum = encoder_context->brc.num_iframes_in_gop,
102         pnum = encoder_context->brc.num_pframes_in_gop,
103         bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
104     int intra_period = encoder_context->brc.gop_size;
105     int i;
106
107     mfc_context->brc.mode = encoder_context->rate_control_mode;
108
109     mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
110     mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
111     mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
112
113     mfc_context->hrd.buffer_size = encoder_context->brc.hrd_buffer_size;
114     mfc_context->hrd.current_buffer_fullness =
115         (double)(encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
116         encoder_context->brc.hrd_initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
117     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
118     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
119     mfc_context->hrd.violation_noted = 0;
120
121     for (i = 0; i < encoder_context->layer.num_layers; i++) {
122         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
123         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
124         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
125
126         if (i == 0) {
127             bitrate = encoder_context->brc.bits_per_second[0];
128             framerate = (double)encoder_context->brc.framerate_per_100s[0] / 100.0;
129         } else {
130             bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
131             framerate = (double)(encoder_context->brc.framerate_per_100s[i] - encoder_context->brc.framerate_per_100s[i - 1]) / 100.0;
132         }
133
134         if (i == encoder_context->layer.num_layers - 1)
135             factor = 1.0;
136         else
137             factor = (double)encoder_context->brc.framerate_per_100s[i] / encoder_context->brc.framerate_per_100s[i + 1];
138
139         mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period * factor)/framerate) /
140                                                                     (double)(inum + BRC_PWEIGHT * pnum * factor + BRC_BWEIGHT * bnum * factor));
141         mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
142         mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
143
144         bpf = mfc_context->brc.bits_per_frame[i] = bitrate/framerate;
145
146         if ((bpf > qp51_size) && (bpf < qp1_size)) {
147             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
148         }
149         else if (bpf >= qp1_size)
150             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
151         else if (bpf <= qp51_size)
152             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
153
154         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
155         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
156
157         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], 1, 51);
158         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], 1, 51);
159         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], 1, 51);
160     }
161 }
162
163 int intel_mfc_update_hrd(struct encode_state *encode_state,
164                          struct intel_encoder_context *encoder_context,
165                          int frame_bits)
166 {
167     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
168     double prev_bf = mfc_context->hrd.current_buffer_fullness;
169
170     mfc_context->hrd.current_buffer_fullness -= frame_bits;
171
172     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
173         mfc_context->hrd.current_buffer_fullness = prev_bf;
174         return BRC_UNDERFLOW;
175     }
176     
177     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame[encoder_context->layer.curr_frame_layer_id];
178     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
179         if (mfc_context->brc.mode == VA_RC_VBR)
180             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
181         else {
182             mfc_context->hrd.current_buffer_fullness = prev_bf;
183             return BRC_OVERFLOW;
184         }
185     }
186     return BRC_NO_HRD_VIOLATION;
187 }
188
189 int intel_mfc_brc_postpack(struct encode_state *encode_state,
190                            struct intel_encoder_context *encoder_context,
191                            int frame_bits)
192 {
193     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
194     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
195     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
196     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
197     int curr_frame_layer_id, next_frame_layer_id;
198     int qpi, qpp, qpb;
199     int qp; // quantizer of previously encoded slice of current type
200     int qpn; // predicted quantizer for next frame of current type in integer format
201     double qpf; // predicted quantizer for next frame of current type in float format
202     double delta_qp; // QP correction
203     int target_frame_size, frame_size_next;
204     /* Notes:
205      *  x - how far we are from HRD buffer borders
206      *  y - how far we are from target HRD buffer fullness
207      */
208     double x, y;
209     double frame_size_alpha, factor;
210
211     if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
212         curr_frame_layer_id = 0;
213         next_frame_layer_id = 0;
214     } else {
215         curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
216         next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
217     }
218
219     /* checking wthether HRD compliance first */
220     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
221
222     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
223         /* nothing */
224     } else {
225         next_frame_layer_id = curr_frame_layer_id;
226     }
227
228     if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0)
229         factor = 1.0;
230     else
231         factor = (double)encoder_context->brc.framerate_per_100s[next_frame_layer_id] / encoder_context->brc.framerate_per_100s[encoder_context->layer.num_layers - 1];
232
233     qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
234     qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
235     qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
236
237     qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
238
239     target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
240     if (mfc_context->hrd.buffer_capacity < 5)
241         frame_size_alpha = 0;
242     else
243         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype] * factor;
244     if (frame_size_alpha > 30) frame_size_alpha = 30;
245     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
246         (double)(frame_size_alpha + 1.);
247
248     /* frame_size_next: avoiding negative number and too small value */
249     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
250         frame_size_next = (int)((double)target_frame_size * 0.25);
251
252     qpf = (double)qp * target_frame_size / frame_size_next;
253     qpn = (int)(qpf + 0.5);
254
255     if (qpn == qp) {
256         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
257         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
258         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
259             qpn++;
260             mfc_context->brc.qpf_rounding_accumulator = 0.;
261         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
262             qpn--;
263             mfc_context->brc.qpf_rounding_accumulator = 0.;
264         }
265     }
266     /* making sure that QP is not changing too fast */
267     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
268     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
269     /* making sure that with QP predictions we did do not leave QPs range */
270     BRC_CLIP(qpn, 1, 51);
271
272     /* calculating QP delta as some function*/
273     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
274     if (x > 0) {
275         x /= mfc_context->hrd.target_buffer_fullness;
276         y = mfc_context->hrd.current_buffer_fullness;
277     }
278     else {
279         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
280         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
281     }
282     if (y < 0.01) y = 0.01;
283     if (x > 1) x = 1;
284     else if (x < -1) x = -1;
285
286     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
287     qpn = (int)(qpn + delta_qp + 0.5);
288
289     /* making sure that with QP predictions we did do not leave QPs range */
290     BRC_CLIP(qpn, 1, 51);
291
292     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
293         /* correcting QPs of slices of other types */
294         if (slicetype == SLICE_TYPE_P) {
295             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
296                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
297             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
298                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
299         } else if (slicetype == SLICE_TYPE_I) {
300             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
301                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
302             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
303                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
304         } else { // SLICE_TYPE_B
305             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
306                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
307             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
308                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
309         }
310         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], 1, 51);
311         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], 1, 51);
312         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], 1, 51);
313     } else if (sts == BRC_UNDERFLOW) { // underflow
314         if (qpn <= qp) qpn = qp + 1;
315         if (qpn > 51) {
316             qpn = 51;
317             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
318         }
319     } else if (sts == BRC_OVERFLOW) {
320         if (qpn >= qp) qpn = qp - 1;
321         if (qpn < 1) { // < 0 (?) overflow with minQP
322             qpn = 1;
323             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
324         }
325     }
326
327     mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
328
329     return sts;
330 }
331
332 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
333                                        struct intel_encoder_context *encoder_context)
334 {
335     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
336     unsigned int rate_control_mode = encoder_context->rate_control_mode;
337     int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
338     
339     // current we only support CBR mode.
340     if (rate_control_mode == VA_RC_CBR) {
341         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
342         mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
343         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
344         mfc_context->vui_hrd.i_frame_number = 0;
345
346         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; 
347         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
348         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
349     }
350
351 }
352
353 void 
354 intel_mfc_hrd_context_update(struct encode_state *encode_state, 
355                              struct gen6_mfc_context *mfc_context)
356 {
357     mfc_context->vui_hrd.i_frame_number++;
358 }
359
360 int intel_mfc_interlace_check(VADriverContextP ctx,
361                               struct encode_state *encode_state,
362                               struct intel_encoder_context *encoder_context)
363 {
364     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
365     VAEncSliceParameterBufferH264 *pSliceParameter;
366     int i;
367     int mbCount = 0;
368     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
369     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
370   
371     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
372         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer; 
373         mbCount += pSliceParameter->num_macroblocks; 
374     }
375     
376     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
377         return 0;
378
379     return 1;
380 }
381
382 void intel_mfc_brc_prepare(struct encode_state *encode_state,
383                            struct intel_encoder_context *encoder_context)
384 {
385     unsigned int rate_control_mode = encoder_context->rate_control_mode;
386
387     if (encoder_context->codec != CODEC_H264 &&
388         encoder_context->codec != CODEC_H264_MVC)
389         return;
390
391     if (rate_control_mode == VA_RC_CBR) {
392         /*Programing bit rate control */
393         if (encoder_context->brc.need_reset) {
394             intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
395             intel_mfc_brc_init(encode_state, encoder_context);
396         }
397
398         /*Programing HRD control */
399         if (encoder_context->brc.need_reset)
400             intel_mfc_hrd_context_init(encode_state, encoder_context);    
401     }
402 }
403
404 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
405                                               struct encode_state *encode_state,
406                                               struct intel_encoder_context *encoder_context,
407                                               struct intel_batchbuffer *slice_batch)
408 {
409     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
410     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
411     unsigned int rate_control_mode = encoder_context->rate_control_mode;
412     unsigned int skip_emul_byte_cnt;
413
414     if (encode_state->packed_header_data[idx]) {
415         VAEncPackedHeaderParameterBuffer *param = NULL;
416         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
417         unsigned int length_in_bits;
418
419         assert(encode_state->packed_header_param[idx]);
420         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
421         length_in_bits = param->bit_length;
422
423         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
424         mfc_context->insert_object(ctx,
425                                    encoder_context,
426                                    header_data,
427                                    ALIGN(length_in_bits, 32) >> 5,
428                                    length_in_bits & 0x1f,
429                                    skip_emul_byte_cnt,
430                                    0,
431                                    0,
432                                    !param->has_emulation_bytes,
433                                    slice_batch);
434     }
435
436     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
437
438     if (encode_state->packed_header_data[idx]) {
439         VAEncPackedHeaderParameterBuffer *param = NULL;
440         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
441         unsigned int length_in_bits;
442
443         assert(encode_state->packed_header_param[idx]);
444         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
445         length_in_bits = param->bit_length;
446
447         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
448
449         mfc_context->insert_object(ctx,
450                                    encoder_context,
451                                    header_data,
452                                    ALIGN(length_in_bits, 32) >> 5,
453                                    length_in_bits & 0x1f,
454                                    skip_emul_byte_cnt,
455                                    0,
456                                    0,
457                                    !param->has_emulation_bytes,
458                                    slice_batch);
459     }
460     
461     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
462
463     if (encode_state->packed_header_data[idx]) {
464         VAEncPackedHeaderParameterBuffer *param = NULL;
465         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
466         unsigned int length_in_bits;
467
468         assert(encode_state->packed_header_param[idx]);
469         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
470         length_in_bits = param->bit_length;
471
472         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
473         mfc_context->insert_object(ctx,
474                                    encoder_context,
475                                    header_data,
476                                    ALIGN(length_in_bits, 32) >> 5,
477                                    length_in_bits & 0x1f,
478                                    skip_emul_byte_cnt,
479                                    0,
480                                    0,
481                                    !param->has_emulation_bytes,
482                                    slice_batch);
483     } else if (rate_control_mode == VA_RC_CBR) {
484         // this is frist AU
485         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
486
487         unsigned char *sei_data = NULL;
488     
489         int length_in_bits = build_avc_sei_buffer_timing(
490             mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
491             mfc_context->vui_hrd.i_initial_cpb_removal_delay,
492             0,
493             mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
494             mfc_context->vui_hrd.i_dpb_output_delay_length,
495             0,
496             &sei_data);
497         mfc_context->insert_object(ctx,
498                                    encoder_context,
499                                    (unsigned int *)sei_data,
500                                    ALIGN(length_in_bits, 32) >> 5,
501                                    length_in_bits & 0x1f,
502                                    5,
503                                    0,   
504                                    0,   
505                                    1,
506                                    slice_batch);  
507         free(sei_data);
508     }
509 }
510
511 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, 
512                                struct encode_state *encode_state,
513                                struct intel_encoder_context *encoder_context)
514 {
515     struct i965_driver_data *i965 = i965_driver_data(ctx);
516     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
517     struct object_surface *obj_surface; 
518     struct object_buffer *obj_buffer;
519     GenAvcSurface *gen6_avc_surface;
520     dri_bo *bo;
521     VAStatus vaStatus = VA_STATUS_SUCCESS;
522     int i, j, enable_avc_ildb = 0;
523     VAEncSliceParameterBufferH264 *slice_param;
524     struct i965_coded_buffer_segment *coded_buffer_segment;
525     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
526     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
527     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
528
529     if (IS_GEN6(i965->intel.device_info)) {
530         /* On the SNB it should be fixed to 128 for the DMV buffer */
531         width_in_mbs = 128;
532     }
533
534     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
535         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
536         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
537
538         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
539             assert((slice_param->slice_type == SLICE_TYPE_I) ||
540                    (slice_param->slice_type == SLICE_TYPE_SI) ||
541                    (slice_param->slice_type == SLICE_TYPE_P) ||
542                    (slice_param->slice_type == SLICE_TYPE_SP) ||
543                    (slice_param->slice_type == SLICE_TYPE_B));
544
545             if (slice_param->disable_deblocking_filter_idc != 1) {
546                 enable_avc_ildb = 1;
547                 break;
548             }
549
550             slice_param++;
551         }
552     }
553
554     /*Setup all the input&output object*/
555
556     /* Setup current frame and current direct mv buffer*/
557     obj_surface = encode_state->reconstructed_object;
558     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
559
560     if ( obj_surface->private_data == NULL) {
561         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
562         assert(gen6_avc_surface);
563         gen6_avc_surface->dmv_top = 
564             dri_bo_alloc(i965->intel.bufmgr,
565                          "Buffer",
566                          68 * width_in_mbs * height_in_mbs, 
567                          64);
568         gen6_avc_surface->dmv_bottom = 
569             dri_bo_alloc(i965->intel.bufmgr,
570                          "Buffer",
571                          68 * width_in_mbs * height_in_mbs, 
572                          64);
573         assert(gen6_avc_surface->dmv_top);
574         assert(gen6_avc_surface->dmv_bottom);
575         obj_surface->private_data = (void *)gen6_avc_surface;
576         obj_surface->free_private_data = (void *)gen_free_avc_surface; 
577     }
578     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
579     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
580     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
581     dri_bo_reference(gen6_avc_surface->dmv_top);
582     dri_bo_reference(gen6_avc_surface->dmv_bottom);
583
584     if (enable_avc_ildb) {
585         mfc_context->post_deblocking_output.bo = obj_surface->bo;
586         dri_bo_reference(mfc_context->post_deblocking_output.bo);
587     } else {
588         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
589         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
590     }
591
592     mfc_context->surface_state.width = obj_surface->orig_width;
593     mfc_context->surface_state.height = obj_surface->orig_height;
594     mfc_context->surface_state.w_pitch = obj_surface->width;
595     mfc_context->surface_state.h_pitch = obj_surface->height;
596     
597     /* Setup reference frames and direct mv buffers*/
598     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
599         obj_surface = encode_state->reference_objects[i];
600         
601         if (obj_surface && obj_surface->bo) {
602             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
603             dri_bo_reference(obj_surface->bo);
604
605             /* Check DMV buffer */
606             if ( obj_surface->private_data == NULL) {
607                 
608                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
609                 assert(gen6_avc_surface);
610                 gen6_avc_surface->dmv_top = 
611                     dri_bo_alloc(i965->intel.bufmgr,
612                                  "Buffer",
613                                  68 * width_in_mbs * height_in_mbs, 
614                                  64);
615                 gen6_avc_surface->dmv_bottom = 
616                     dri_bo_alloc(i965->intel.bufmgr,
617                                  "Buffer",
618                                  68 * width_in_mbs * height_in_mbs, 
619                                  64);
620                 assert(gen6_avc_surface->dmv_top);
621                 assert(gen6_avc_surface->dmv_bottom);
622                 obj_surface->private_data = gen6_avc_surface;
623                 obj_surface->free_private_data = gen_free_avc_surface; 
624             }
625     
626             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
627             /* Setup DMV buffer */
628             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
629             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
630             dri_bo_reference(gen6_avc_surface->dmv_top);
631             dri_bo_reference(gen6_avc_surface->dmv_bottom);
632         } else {
633             break;
634         }
635     }
636
637     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
638     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
639
640     obj_buffer = encode_state->coded_buf_object;
641     bo = obj_buffer->buffer_store->bo;
642     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
643     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
644     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
645     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
646     
647     dri_bo_map(bo, 1);
648     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
649     coded_buffer_segment->mapped = 0;
650     coded_buffer_segment->codec = encoder_context->codec;
651     dri_bo_unmap(bo);
652
653     return vaStatus;
654 }
655 /*
656  * The LUT uses the pair of 4-bit units: (shift, base) structure.
657  * 2^K * X = value . 
658  * So it is necessary to convert one cost into the nearest LUT format.
659  * The derivation is:
660  * 2^K *x = 2^n * (1 + deltaX)
661  *    k + log2(x) = n + log2(1 + deltaX)
662  *    log2(x) = n - k + log2(1 + deltaX)
663  *    As X is in the range of [1, 15]
664  *      4 > n - k + log2(1 + deltaX) >= 0 
665  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
666  *    Then we can derive the corresponding K and get the nearest LUT format.
667  */
668 int intel_format_lutvalue(int value, int max)
669 {
670     int ret;
671     int logvalue, temp1, temp2;
672
673     if (value <= 0)
674         return 0;
675
676     logvalue = (int)(log2f((float)value));
677     if (logvalue < 4) {
678         ret = value;
679     } else {
680         int error, temp_value, base, j, temp_err;
681         error = value;
682         j = logvalue - 4 + 1;
683         ret = -1;
684         for(; j <= logvalue; j++) {
685             if (j == 0) {
686                 base = value >> j;
687             } else {
688                 base = (value + (1 << (j - 1)) - 1) >> j;
689             }
690             if (base >= 16)
691                 continue;
692
693             temp_value = base << j;
694             temp_err = abs(value - temp_value);
695             if (temp_err < error) {
696                 error = temp_err;
697                 ret = (j << 4) | base;
698                 if (temp_err == 0)
699                     break;
700             }
701         }
702     }
703     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
704     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
705     if (temp1 > temp2)
706         ret = max;
707     return ret;
708
709 }
710
711
712 #define         QP_MAX                  52
713 #define         VP8_QP_MAX              128
714
715
716 static float intel_lambda_qp(int qp)
717 {
718     float value, lambdaf;
719     value = qp;
720     value = value / 6 - 2;
721     if (value < 0)
722         value = 0;
723     lambdaf = roundf(powf(2, value));
724     return lambdaf;
725 }
726
727 static
728 void intel_h264_calc_mbmvcost_qp(int qp,
729                                  int slice_type,
730                                  uint8_t *vme_state_message)
731 {
732     int m_cost, j, mv_count;
733     float   lambda, m_costf;
734
735     assert(qp <= QP_MAX); 
736     lambda = intel_lambda_qp(qp);
737
738     m_cost = lambda;
739     vme_state_message[MODE_CHROMA_INTRA] = 0;
740     vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
741
742     if (slice_type == SLICE_TYPE_I) {
743         vme_state_message[MODE_INTRA_16X16] = 0;
744         m_cost = lambda * 4;
745         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
746         m_cost = lambda * 16; 
747         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
748         m_cost = lambda * 3;
749         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
750     } else {
751         m_cost = 0;
752         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
753         for (j = 1; j < 3; j++) {
754             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
755             m_cost = (int)m_costf;
756             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
757         }
758         mv_count = 3;
759         for (j = 4; j <= 64; j *= 2) {
760             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
761             m_cost = (int)m_costf;
762             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
763             mv_count++;
764         }
765
766         if (qp <= 25) {
767             vme_state_message[MODE_INTRA_16X16] = 0x4a;
768             vme_state_message[MODE_INTRA_8X8] = 0x4a;
769             vme_state_message[MODE_INTRA_4X4] = 0x4a;
770             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
771             vme_state_message[MODE_INTER_16X16] = 0x4a;
772             vme_state_message[MODE_INTER_16X8] = 0x4a;
773             vme_state_message[MODE_INTER_8X8] = 0x4a;
774             vme_state_message[MODE_INTER_8X4] = 0x4a;
775             vme_state_message[MODE_INTER_4X4] = 0x4a;
776             vme_state_message[MODE_INTER_BWD] = 0x2a;
777             return;
778         }
779         m_costf = lambda * 10;
780         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
781         m_cost = lambda * 14;
782         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
783         m_cost = lambda * 24; 
784         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
785         m_costf = lambda * 3.5;
786         m_cost = m_costf;
787         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
788         if (slice_type == SLICE_TYPE_P) {
789             m_costf = lambda * 2.5;
790             m_cost = m_costf;
791             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
792             m_costf = lambda * 4;
793             m_cost = m_costf;
794             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
795             m_costf = lambda * 1.5;
796             m_cost = m_costf;
797             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
798             m_costf = lambda * 3;
799             m_cost = m_costf;
800             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
801             m_costf = lambda * 5;
802             m_cost = m_costf;
803             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
804             /* BWD is not used in P-frame */
805             vme_state_message[MODE_INTER_BWD] = 0;
806         } else {
807             m_costf = lambda * 2.5;
808             m_cost = m_costf;
809             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
810             m_costf = lambda * 5.5;
811             m_cost = m_costf;
812             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
813             m_costf = lambda * 3.5;
814             m_cost = m_costf;
815             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
816             m_costf = lambda * 5.0;
817             m_cost = m_costf;
818             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
819             m_costf = lambda * 6.5;
820             m_cost = m_costf;
821             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
822             m_costf = lambda * 1.5;
823             m_cost = m_costf;
824             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
825         }
826     }
827     return;
828 }
829
830 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
831                                 struct encode_state *encode_state,
832                                 struct intel_encoder_context *encoder_context)
833 {
834     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
835     struct gen6_vme_context *vme_context = encoder_context->vme_context;
836     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
837     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
838     int qp;
839     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
840
841     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
842
843     if (encoder_context->rate_control_mode == VA_RC_CQP)
844         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
845     else
846         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
847
848     if (vme_state_message == NULL)
849         return;
850
851     intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
852 }
853
854 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
855                                 struct encode_state *encode_state,
856                                 struct intel_encoder_context *encoder_context)
857 {
858     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
859     struct gen6_vme_context *vme_context = encoder_context->vme_context;
860     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
861     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
862     int qp, m_cost, j, mv_count;
863     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
864     float   lambda, m_costf;
865
866     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
867     int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
868   
869     if (vme_state_message == NULL)
870         return;
871  
872     if (encoder_context->rate_control_mode == VA_RC_CQP)
873         qp = q_matrix->quantization_index[0];
874     else
875         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
876
877     lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
878
879     m_cost = lambda;
880     vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
881
882     if (is_key_frame) {
883         vme_state_message[MODE_INTRA_16X16] = 0;
884         m_cost = lambda * 16; 
885         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
886         m_cost = lambda * 3;
887         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
888     } else {
889         m_cost = 0;
890         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
891         for (j = 1; j < 3; j++) {
892             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
893             m_cost = (int)m_costf;
894             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
895         }
896         mv_count = 3;
897         for (j = 4; j <= 64; j *= 2) {
898             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
899             m_cost = (int)m_costf;
900             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
901             mv_count++;
902         }
903
904         if (qp < 92 ) {
905             vme_state_message[MODE_INTRA_16X16] = 0x4a;
906             vme_state_message[MODE_INTRA_4X4] = 0x4a;
907             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
908             vme_state_message[MODE_INTER_16X16] = 0x4a;
909             vme_state_message[MODE_INTER_16X8] = 0x4a;
910             vme_state_message[MODE_INTER_8X8] = 0x4a;
911             vme_state_message[MODE_INTER_4X4] = 0x4a;
912             vme_state_message[MODE_INTER_BWD] = 0;
913             return;
914         }
915         m_costf = lambda * 10;
916         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
917         m_cost = lambda * 24; 
918         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
919             
920         m_costf = lambda * 3.5;
921         m_cost = m_costf;
922         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
923
924         m_costf = lambda * 2.5;
925         m_cost = m_costf;
926         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
927         m_costf = lambda * 4;
928         m_cost = m_costf;
929         vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
930         m_costf = lambda * 1.5;
931         m_cost = m_costf;
932         vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
933         m_costf = lambda * 5;
934         m_cost = m_costf;
935         vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
936         /* BWD is not used in P-frame */
937         vme_state_message[MODE_INTER_BWD] = 0;
938     }
939 }
940
941 #define         MB_SCOREBOARD_A         (1 << 0)
942 #define         MB_SCOREBOARD_B         (1 << 1)
943 #define         MB_SCOREBOARD_C         (1 << 2)
944 void 
945 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
946 {
947     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
948     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
949     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
950                                                            MB_SCOREBOARD_B |
951                                                            MB_SCOREBOARD_C);
952
953     /* In VME prediction the current mb depends on the neighbour 
954      * A/B/C macroblock. So the left/up/up-right dependency should
955      * be considered.
956      */
957     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
958     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
959     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
960     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
961     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
962     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
963
964     vme_context->gpe_context.vfe_desc7.dword = 0;
965     return;
966 }
967
968 /* check whether the mb of (x_index, y_index) is out of bound */
969 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
970 {
971     int mb_index;
972     if (x_index < 0 || x_index >= mb_width)
973         return -1;
974     if (y_index < 0 || y_index >= mb_height)
975         return -1;
976
977     mb_index = y_index * mb_width + x_index;
978     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
979         return -1;
980     return 0;
981 }
982
983 void
984 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
985                                      struct encode_state *encode_state,
986                                      int mb_width, int mb_height,
987                                      int kernel,
988                                      int transform_8x8_mode_flag,
989                                      struct intel_encoder_context *encoder_context)
990 {
991     struct gen6_vme_context *vme_context = encoder_context->vme_context;
992     int mb_row;
993     int s;
994     unsigned int *command_ptr;
995     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
996     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
997     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
998     int qp,qp_mb,qp_index;
999     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1000
1001     if (encoder_context->rate_control_mode == VA_RC_CQP)
1002         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1003     else
1004         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1005
1006 #define         USE_SCOREBOARD          (1 << 21)
1007  
1008     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1009     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1010
1011     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1012         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1013         int first_mb = pSliceParameter->macroblock_address;
1014         int num_mb = pSliceParameter->num_macroblocks;
1015         unsigned int mb_intra_ub, score_dep;
1016         int x_outer, y_outer, x_inner, y_inner;
1017         int xtemp_outer = 0;
1018
1019         x_outer = first_mb % mb_width;
1020         y_outer = first_mb / mb_width;
1021         mb_row = y_outer;
1022
1023         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1024             x_inner = x_outer;
1025             y_inner = y_outer;
1026             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1027                 mb_intra_ub = 0;
1028                 score_dep = 0;
1029                 if (x_inner != 0) {
1030                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1031                     score_dep |= MB_SCOREBOARD_A; 
1032                 }
1033                 if (y_inner != mb_row) {
1034                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1035                     score_dep |= MB_SCOREBOARD_B;
1036                     if (x_inner != 0)
1037                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1038                     if (x_inner != (mb_width -1)) {
1039                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1040                         score_dep |= MB_SCOREBOARD_C;
1041                     }
1042                 }
1043
1044                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1045                 *command_ptr++ = kernel;
1046                 *command_ptr++ = USE_SCOREBOARD;
1047                 /* Indirect data */
1048                 *command_ptr++ = 0;
1049                 /* the (X, Y) term of scoreboard */
1050                 *command_ptr++ = ((y_inner << 16) | x_inner);
1051                 *command_ptr++ = score_dep;
1052                 /*inline data */
1053                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1054                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1055                 /* QP occupies one byte */
1056                 if (vme_context->roi_enabled) {
1057                     qp_index = y_inner * mb_width + x_inner;
1058                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1059                 } else
1060                     qp_mb = qp;
1061                 *command_ptr++ = qp_mb;
1062                 x_inner -= 2;
1063                 y_inner += 1;
1064             }
1065             x_outer += 1;
1066         }
1067
1068         xtemp_outer = mb_width - 2;
1069         if (xtemp_outer < 0)
1070             xtemp_outer = 0;
1071         x_outer = xtemp_outer;
1072         y_outer = first_mb / mb_width;
1073         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1074             y_inner = y_outer;
1075             x_inner = x_outer;
1076             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1077                 mb_intra_ub = 0;
1078                 score_dep = 0;
1079                 if (x_inner != 0) {
1080                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1081                     score_dep |= MB_SCOREBOARD_A; 
1082                 }
1083                 if (y_inner != mb_row) {
1084                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1085                     score_dep |= MB_SCOREBOARD_B;
1086                     if (x_inner != 0)
1087                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1088
1089                     if (x_inner != (mb_width -1)) {
1090                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1091                         score_dep |= MB_SCOREBOARD_C;
1092                     }
1093                 }
1094
1095                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1096                 *command_ptr++ = kernel;
1097                 *command_ptr++ = USE_SCOREBOARD;
1098                 /* Indirect data */
1099                 *command_ptr++ = 0;
1100                 /* the (X, Y) term of scoreboard */
1101                 *command_ptr++ = ((y_inner << 16) | x_inner);
1102                 *command_ptr++ = score_dep;
1103                 /*inline data */
1104                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1105                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1106                 /* qp occupies one byte */
1107                 if (vme_context->roi_enabled) {
1108                     qp_index = y_inner * mb_width + x_inner;
1109                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1110                 } else
1111                     qp_mb = qp;
1112                 *command_ptr++ = qp_mb;
1113
1114                 x_inner -= 2;
1115                 y_inner += 1;
1116             }
1117             x_outer++;
1118             if (x_outer >= mb_width) {
1119                 y_outer += 1;
1120                 x_outer = xtemp_outer;
1121             }           
1122         }
1123     }
1124
1125     *command_ptr++ = 0;
1126     *command_ptr++ = MI_BATCH_BUFFER_END;
1127
1128     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1129 }
1130
1131 static uint8_t
1132 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1133 {
1134     unsigned int is_long_term =
1135         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1136     unsigned int is_top_field =
1137         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1138     unsigned int is_bottom_field =
1139         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1140
1141     return ((is_long_term                         << 6) |
1142             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1143             (frame_store_id                       << 1) |
1144             ((is_top_field ^ 1) & is_bottom_field));
1145 }
1146
1147 void
1148 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1149                             struct encode_state *encode_state,
1150                             struct intel_encoder_context *encoder_context)
1151 {
1152     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1153     struct intel_batchbuffer *batch = encoder_context->base.batch;
1154     int slice_type;
1155     struct object_surface *obj_surface;
1156     unsigned int fref_entry, bref_entry;
1157     int frame_index, i;
1158     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1159
1160     fref_entry = 0x80808080;
1161     bref_entry = 0x80808080;
1162     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1163
1164     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1165         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1166
1167         if (ref_idx_l0 > 3) {
1168             WARN_ONCE("ref_idx_l0 is out of range\n");
1169             ref_idx_l0 = 0;
1170         }
1171
1172         obj_surface = vme_context->used_reference_objects[0];
1173         frame_index = -1;
1174         for (i = 0; i < 16; i++) {
1175             if (obj_surface &&
1176                 obj_surface == encode_state->reference_objects[i]) {
1177                 frame_index = i;
1178                 break;
1179             }
1180         }
1181         if (frame_index == -1) {
1182             WARN_ONCE("RefPicList0 is not found in DPB!\n");
1183         } else {
1184             int ref_idx_l0_shift = ref_idx_l0 * 8;
1185             fref_entry &= ~(0xFF << ref_idx_l0_shift);
1186             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1187         }
1188     }
1189
1190     if (slice_type == SLICE_TYPE_B) {
1191         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1192
1193         if (ref_idx_l1 > 3) {
1194             WARN_ONCE("ref_idx_l1 is out of range\n");
1195             ref_idx_l1 = 0;
1196         }
1197
1198         obj_surface = vme_context->used_reference_objects[1];
1199         frame_index = -1;
1200         for (i = 0; i < 16; i++) {
1201             if (obj_surface &&
1202                 obj_surface == encode_state->reference_objects[i]) {
1203                 frame_index = i;
1204                 break;
1205             }
1206         }
1207         if (frame_index == -1) {
1208             WARN_ONCE("RefPicList1 is not found in DPB!\n");
1209         } else {
1210             int ref_idx_l1_shift = ref_idx_l1 * 8;
1211             bref_entry &= ~(0xFF << ref_idx_l1_shift);
1212             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1213         }
1214     }
1215
1216     BEGIN_BCS_BATCH(batch, 10);
1217     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1218     OUT_BCS_BATCH(batch, 0);                  //Select L0
1219     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1220     for(i = 0; i < 7; i++) {
1221         OUT_BCS_BATCH(batch, 0x80808080);
1222     }
1223     ADVANCE_BCS_BATCH(batch);
1224
1225     BEGIN_BCS_BATCH(batch, 10);
1226     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1227     OUT_BCS_BATCH(batch, 1);                  //Select L1
1228     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1229     for(i = 0; i < 7; i++) {
1230         OUT_BCS_BATCH(batch, 0x80808080);
1231     }
1232     ADVANCE_BCS_BATCH(batch);
1233 }
1234
1235
1236 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1237                                  struct encode_state *encode_state,
1238                                  struct intel_encoder_context *encoder_context)
1239 {
1240     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1241     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1242     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1243     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1244     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1245     uint32_t mv_x, mv_y;
1246     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1247     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1248     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1249
1250     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1251         mv_x = 512;
1252         mv_y = 64;
1253     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1254         mv_x = 1024;
1255         mv_y = 128;
1256     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1257         mv_x = 2048;
1258         mv_y = 128;
1259     } else {
1260         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1261         mv_x = 512;
1262         mv_y = 64;
1263     }
1264
1265     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1266     if (pic_param->picture_type != VAEncPictureTypeIntra) {
1267         int qp, m_cost, j, mv_count;
1268         float   lambda, m_costf;
1269         slice_param = (VAEncSliceParameterBufferMPEG2 *)
1270             encode_state->slice_params_ext[0]->buffer;
1271         qp = slice_param->quantiser_scale_code;
1272         lambda = intel_lambda_qp(qp);
1273         /* No Intra prediction. So it is zero */
1274         vme_state_message[MODE_INTRA_8X8] = 0;
1275         vme_state_message[MODE_INTRA_4X4] = 0;
1276         vme_state_message[MODE_INTER_MV0] = 0;
1277         for (j = 1; j < 3; j++) {
1278             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1279             m_cost = (int)m_costf;
1280             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1281         }
1282         mv_count = 3;
1283         for (j = 4; j <= 64; j *= 2) {
1284             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1285             m_cost = (int)m_costf;
1286             vme_state_message[MODE_INTER_MV0 + mv_count] =
1287                 intel_format_lutvalue(m_cost, 0x6f);
1288             mv_count++;
1289         }
1290         m_cost = lambda;
1291         /* It can only perform the 16x16 search. So mode cost can be ignored for
1292          * the other mode. for example: 16x8/8x8
1293          */
1294         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1295         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1296
1297         vme_state_message[MODE_INTER_16X8] = 0;
1298         vme_state_message[MODE_INTER_8X8] = 0;
1299         vme_state_message[MODE_INTER_8X4] = 0;
1300         vme_state_message[MODE_INTER_4X4] = 0;
1301         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1302
1303     }
1304     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1305
1306     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1307         width_in_mbs;
1308 }
1309
1310 void
1311 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1312                                            struct encode_state *encode_state,
1313                                            int mb_width, int mb_height,
1314                                            int kernel,
1315                                            struct intel_encoder_context *encoder_context)
1316 {
1317     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1318     unsigned int *command_ptr;
1319
1320 #define         MPEG2_SCOREBOARD                (1 << 21)
1321
1322     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1323     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1324
1325     {
1326         unsigned int mb_intra_ub, score_dep;
1327         int x_outer, y_outer, x_inner, y_inner;
1328         int xtemp_outer = 0;
1329         int first_mb = 0;
1330         int num_mb = mb_width * mb_height;
1331
1332         x_outer = 0;
1333         y_outer = 0;
1334
1335
1336         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1337             x_inner = x_outer;
1338             y_inner = y_outer;
1339             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1340                 mb_intra_ub = 0;
1341                 score_dep = 0;
1342                 if (x_inner != 0) {
1343                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1344                     score_dep |= MB_SCOREBOARD_A; 
1345                 }
1346                 if (y_inner != 0) {
1347                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1348                     score_dep |= MB_SCOREBOARD_B;
1349
1350                     if (x_inner != 0)
1351                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1352
1353                     if (x_inner != (mb_width -1)) {
1354                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1355                         score_dep |= MB_SCOREBOARD_C;
1356                     }
1357                 }
1358
1359                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1360                 *command_ptr++ = kernel;
1361                 *command_ptr++ = MPEG2_SCOREBOARD;
1362                 /* Indirect data */
1363                 *command_ptr++ = 0;
1364                 /* the (X, Y) term of scoreboard */
1365                 *command_ptr++ = ((y_inner << 16) | x_inner);
1366                 *command_ptr++ = score_dep;
1367                 /*inline data */
1368                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1369                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1370                 x_inner -= 2;
1371                 y_inner += 1;
1372             }
1373             x_outer += 1;
1374         }
1375
1376         xtemp_outer = mb_width - 2;
1377         if (xtemp_outer < 0)
1378             xtemp_outer = 0;
1379         x_outer = xtemp_outer;
1380         y_outer = 0;
1381         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1382             y_inner = y_outer;
1383             x_inner = x_outer;
1384             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1385                 mb_intra_ub = 0;
1386                 score_dep = 0;
1387                 if (x_inner != 0) {
1388                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1389                     score_dep |= MB_SCOREBOARD_A; 
1390                 }
1391                 if (y_inner != 0) {
1392                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1393                     score_dep |= MB_SCOREBOARD_B;
1394
1395                     if (x_inner != 0)
1396                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1397
1398                     if (x_inner != (mb_width -1)) {
1399                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1400                         score_dep |= MB_SCOREBOARD_C;
1401                     }
1402                 }
1403
1404                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1405                 *command_ptr++ = kernel;
1406                 *command_ptr++ = MPEG2_SCOREBOARD;
1407                 /* Indirect data */
1408                 *command_ptr++ = 0;
1409                 /* the (X, Y) term of scoreboard */
1410                 *command_ptr++ = ((y_inner << 16) | x_inner);
1411                 *command_ptr++ = score_dep;
1412                 /*inline data */
1413                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1414                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1415
1416                 x_inner -= 2;
1417                 y_inner += 1;
1418             }
1419             x_outer++;
1420             if (x_outer >= mb_width) {
1421                 y_outer += 1;
1422                 x_outer = xtemp_outer;
1423             }           
1424         }
1425     }
1426
1427     *command_ptr++ = 0;
1428     *command_ptr++ = MI_BATCH_BUFFER_END;
1429
1430     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1431     return;
1432 }
1433
1434 static int
1435 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1436                           VAPictureH264 *ref_list,
1437                           int num_pictures,
1438                           int dir)
1439 {
1440     int i, found = -1, min = 0x7FFFFFFF;
1441
1442     for (i = 0; i < num_pictures; i++) {
1443         int tmp;
1444
1445         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1446             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1447             break;
1448
1449         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1450
1451         if (dir)
1452             tmp = -tmp;
1453
1454         if (tmp > 0 && tmp < min) {
1455             min = tmp;
1456             found = i;
1457         }
1458     }
1459
1460     return found;
1461 }
1462
1463 void
1464 intel_avc_vme_reference_state(VADriverContextP ctx,
1465                               struct encode_state *encode_state,
1466                               struct intel_encoder_context *encoder_context,
1467                               int list_index,
1468                               int surface_index,
1469                               void (* vme_source_surface_state)(
1470                                   VADriverContextP ctx,
1471                                   int index,
1472                                   struct object_surface *obj_surface,
1473                                   struct intel_encoder_context *encoder_context))
1474 {
1475     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1476     struct object_surface *obj_surface = NULL;
1477     struct i965_driver_data *i965 = i965_driver_data(ctx);
1478     VASurfaceID ref_surface_id;
1479     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1480     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1481     int max_num_references;
1482     VAPictureH264 *curr_pic;
1483     VAPictureH264 *ref_list;
1484     int ref_idx;
1485
1486     if (list_index == 0) {
1487         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1488         ref_list = slice_param->RefPicList0;
1489     } else {
1490         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1491         ref_list = slice_param->RefPicList1;
1492     }
1493
1494     if (max_num_references == 1) {
1495         if (list_index == 0) {
1496             ref_surface_id = slice_param->RefPicList0[0].picture_id;
1497             vme_context->used_references[0] = &slice_param->RefPicList0[0];
1498         } else {
1499             ref_surface_id = slice_param->RefPicList1[0].picture_id;
1500             vme_context->used_references[1] = &slice_param->RefPicList1[0];
1501         }
1502
1503         if (ref_surface_id != VA_INVALID_SURFACE)
1504             obj_surface = SURFACE(ref_surface_id);
1505
1506         if (!obj_surface ||
1507             !obj_surface->bo) {
1508             obj_surface = encode_state->reference_objects[list_index];
1509             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1510         }
1511
1512         ref_idx = 0;
1513     } else {
1514         curr_pic = &pic_param->CurrPic;
1515
1516         /* select the reference frame in temporal space */
1517         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1518         ref_surface_id = ref_list[ref_idx].picture_id;
1519
1520         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1521             obj_surface = SURFACE(ref_surface_id);
1522
1523         vme_context->used_reference_objects[list_index] = obj_surface;
1524         vme_context->used_references[list_index] = &ref_list[ref_idx];
1525     }
1526
1527     if (obj_surface &&
1528         obj_surface->bo) {
1529         assert(ref_idx >= 0);
1530         vme_context->used_reference_objects[list_index] = obj_surface;
1531         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1532         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1533                                                     ref_idx << 16 |
1534                                                     ref_idx <<  8 |
1535                                                     ref_idx);
1536     } else {
1537         vme_context->used_reference_objects[list_index] = NULL;
1538         vme_context->used_references[list_index] = NULL;
1539         vme_context->ref_index_in_mb[list_index] = 0;
1540     }
1541 }
1542
1543 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1544                                         struct encode_state *encode_state,
1545                                         struct intel_encoder_context *encoder_context,
1546                                         int slice_index,
1547                                         struct intel_batchbuffer *slice_batch)
1548 {
1549     int count, i, start_index;
1550     unsigned int length_in_bits;
1551     VAEncPackedHeaderParameterBuffer *param = NULL;
1552     unsigned int *header_data = NULL;
1553     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1554     int slice_header_index;
1555
1556     if (encode_state->slice_header_index[slice_index] == 0)
1557         slice_header_index = -1;
1558     else
1559         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1560
1561     count = encode_state->slice_rawdata_count[slice_index];
1562     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1563
1564     for (i = 0; i < count; i++) {
1565         unsigned int skip_emul_byte_cnt;
1566
1567         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1568
1569         param = (VAEncPackedHeaderParameterBuffer *)
1570                     (encode_state->packed_header_params_ext[start_index + i]->buffer);
1571
1572         /* skip the slice header packed data type as it is lastly inserted */
1573         if (param->type == VAEncPackedHeaderSlice)
1574             continue;
1575
1576         length_in_bits = param->bit_length;
1577
1578         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1579
1580         /* as the slice header is still required, the last header flag is set to
1581          * zero.
1582          */
1583         mfc_context->insert_object(ctx,
1584                                    encoder_context,
1585                                    header_data,
1586                                    ALIGN(length_in_bits, 32) >> 5,
1587                                    length_in_bits & 0x1f,
1588                                    skip_emul_byte_cnt,
1589                                    0,
1590                                    0,
1591                                    !param->has_emulation_bytes,
1592                                    slice_batch);
1593     }
1594
1595     if (slice_header_index == -1) {
1596         unsigned char *slice_header = NULL;
1597         int slice_header_length_in_bits = 0;
1598         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1599         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1600         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1601
1602         /* No slice header data is passed. And the driver needs to generate it */
1603         /* For the Normal H264 */
1604         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1605                                                              pPicParameter,
1606                                                              pSliceParameter,
1607                                                              &slice_header);
1608         mfc_context->insert_object(ctx, encoder_context,
1609                                    (unsigned int *)slice_header,
1610                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1611                                    slice_header_length_in_bits & 0x1f,
1612                                    5,  /* first 5 bytes are start code + nal unit type */
1613                                    1, 0, 1, slice_batch);
1614
1615         free(slice_header);
1616     } else {
1617         unsigned int skip_emul_byte_cnt;
1618
1619         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1620
1621         param = (VAEncPackedHeaderParameterBuffer *)
1622                     (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1623         length_in_bits = param->bit_length;
1624
1625         /* as the slice header is the last header data for one slice,
1626          * the last header flag is set to one.
1627          */
1628         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1629
1630         mfc_context->insert_object(ctx,
1631                                    encoder_context,
1632                                    header_data,
1633                                    ALIGN(length_in_bits, 32) >> 5,
1634                                    length_in_bits & 0x1f,
1635                                    skip_emul_byte_cnt,
1636                                    1,
1637                                    0,
1638                                    !param->has_emulation_bytes,
1639                                    slice_batch);
1640     }
1641
1642     return;
1643 }
1644
1645 void
1646 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1647                                 struct encode_state *encode_state,
1648                                 struct intel_encoder_context *encoder_context)
1649 {
1650     struct i965_driver_data *i965 = i965_driver_data(ctx);
1651     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1652     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1653     int qp;
1654     dri_bo *bo;
1655     uint8_t *cost_table;
1656
1657     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1658
1659
1660     if (slice_type == SLICE_TYPE_I) {
1661         if (vme_context->i_qp_cost_table)
1662             return;
1663     } else if (slice_type == SLICE_TYPE_P) {
1664         if (vme_context->p_qp_cost_table)
1665             return;
1666     } else {
1667         if (vme_context->b_qp_cost_table)
1668             return;
1669     }
1670
1671     /* It is enough to allocate 32 bytes for each qp. */
1672     bo = dri_bo_alloc(i965->intel.bufmgr,
1673                       "cost_table ",
1674                       QP_MAX * 32,
1675                       64);
1676
1677     dri_bo_map(bo, 1);
1678     assert(bo->virtual);
1679     cost_table = (uint8_t *)(bo->virtual);
1680     for (qp = 0; qp < QP_MAX; qp++) {
1681         intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1682         cost_table += 32;
1683     }
1684
1685     dri_bo_unmap(bo);
1686
1687     if (slice_type == SLICE_TYPE_I) {
1688         vme_context->i_qp_cost_table = bo;
1689     } else if (slice_type == SLICE_TYPE_P) {
1690         vme_context->p_qp_cost_table = bo;
1691     } else {
1692         vme_context->b_qp_cost_table = bo;
1693     }
1694
1695     vme_context->cost_table_size = QP_MAX * 32;
1696     return;
1697 }
1698
1699 extern void
1700 intel_h264_setup_cost_surface(VADriverContextP ctx,
1701                               struct encode_state *encode_state,
1702                               struct intel_encoder_context *encoder_context,
1703                               unsigned long binding_table_offset,
1704                               unsigned long surface_state_offset)
1705 {
1706     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1707     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1708     dri_bo *bo;
1709
1710
1711     struct i965_buffer_surface cost_table;
1712
1713     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1714
1715
1716     if (slice_type == SLICE_TYPE_I) {
1717         bo = vme_context->i_qp_cost_table;
1718     } else if (slice_type == SLICE_TYPE_P) {
1719         bo = vme_context->p_qp_cost_table;
1720     } else {
1721         bo = vme_context->b_qp_cost_table;
1722     }
1723
1724     cost_table.bo = bo;
1725     cost_table.num_blocks = QP_MAX;
1726     cost_table.pitch = 16;
1727     cost_table.size_block = 32;
1728
1729     vme_context->vme_buffer_suface_setup(ctx,
1730                                          &vme_context->gpe_context,
1731                                          &cost_table,
1732                                          binding_table_offset,
1733                                          surface_state_offset);
1734 }
1735
1736 /*
1737  * the idea of conversion between qp and qstep comes from scaling process
1738  * of transform coeff for Luma component in H264 spec.
1739  *   2^(Qpy / 6 - 6)
1740  * In order to avoid too small qstep, it is multiplied by 16.
1741  */
1742 static float intel_h264_qp_qstep(int qp)
1743 {
1744     float value, qstep;
1745     value = qp;
1746     value = value / 6 - 2;
1747     qstep = powf(2, value);
1748     return qstep;
1749 }
1750
1751 static int intel_h264_qstep_qp(float qstep)
1752 {
1753     float qp;
1754
1755     qp = 12.0f + 6.0f * log2f(qstep);
1756
1757     return floorf(qp);
1758 }
1759
1760 /*
1761  * Currently it is based on the following assumption:
1762  * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1763  *                                 total_aread * 1 / baseqp_qstep
1764  *
1765  * qstep is the linearized quantizer of H264 quantizer
1766  */
1767 typedef struct {
1768     int row_start_in_mb;
1769     int row_end_in_mb;
1770     int col_start_in_mb;
1771     int col_end_in_mb;
1772
1773     int width_mbs;
1774     int height_mbs;
1775
1776     int roi_qp;
1777 } ROIRegionParam;
1778
1779 static VAStatus
1780 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1781                        int base_qp,
1782                        VAEncMiscParameterBufferROI *pMiscParamROI,
1783                        struct encode_state *encode_state,
1784                        struct intel_encoder_context *encoder_context)
1785 {
1786     int nonroi_qp;
1787     VAEncROI *region_roi;
1788     bool quickfill = 0;
1789
1790     ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1791     int num_roi = 0;
1792     int i,j;
1793
1794     float temp;
1795     float qstep_nonroi, qstep_base;
1796     float roi_area, total_area, nonroi_area;
1797     float sum_roi;
1798
1799     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1800     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1801     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1802     int mbs_in_picture = width_in_mbs * height_in_mbs;
1803
1804     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1805     VAStatus vaStatus = VA_STATUS_SUCCESS;
1806
1807     if(pMiscParamROI != NULL)
1808     {
1809         num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi;
1810
1811         /* currently roi_value_is_qp_delta is the only supported mode of priority.
1812         *
1813         * qp_delta set by user is added to base_qp, which is then clapped by
1814         * [base_qp-min_delta, base_qp+max_delta].
1815         */
1816         ASSERT_RET(pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta,VA_STATUS_ERROR_INVALID_PARAMETER);
1817     }
1818
1819     /* when the base_qp is lower than 12, the quality is quite good based
1820      * on the H264 test experience.
1821      * In such case it is unnecessary to adjust the quality for ROI region.
1822      */
1823     if (base_qp <= 12) {
1824         nonroi_qp = base_qp;
1825         quickfill = 1;
1826         goto qp_fill;
1827     }
1828
1829     sum_roi = 0.0f;
1830     roi_area = 0;
1831     for (i = 0; i < num_roi; i++) {
1832         int row_start, row_end, col_start, col_end;
1833         int roi_width_mbs, roi_height_mbs;
1834         int mbs_in_roi;
1835         int roi_qp;
1836         float qstep_roi;
1837
1838         region_roi =  (VAEncROI *)pMiscParamROI->roi + i;
1839
1840         col_start = region_roi->roi_rectangle.x;
1841         col_end = col_start + region_roi->roi_rectangle.width;
1842         row_start = region_roi->roi_rectangle.y;
1843         row_end = row_start + region_roi->roi_rectangle.height;
1844         col_start = col_start / 16;
1845         col_end = (col_end + 15) / 16;
1846         row_start = row_start / 16;
1847         row_end = (row_end + 15) / 16;
1848
1849         roi_width_mbs = col_end - col_start;
1850         roi_height_mbs = row_end - row_start;
1851         mbs_in_roi = roi_width_mbs * roi_height_mbs;
1852
1853         param_regions[i].row_start_in_mb = row_start;
1854         param_regions[i].row_end_in_mb = row_end;
1855         param_regions[i].col_start_in_mb = col_start;
1856         param_regions[i].col_end_in_mb = col_end;
1857         param_regions[i].width_mbs = roi_width_mbs;
1858         param_regions[i].height_mbs = roi_height_mbs;
1859
1860         roi_qp = base_qp + region_roi->roi_value;
1861         BRC_CLIP(roi_qp, 1, 51);
1862
1863         param_regions[i].roi_qp = roi_qp;
1864         qstep_roi = intel_h264_qp_qstep(roi_qp);
1865
1866         roi_area += mbs_in_roi;
1867         sum_roi += mbs_in_roi / qstep_roi;
1868     }
1869
1870     total_area = mbs_in_picture;
1871     nonroi_area = total_area - roi_area;
1872
1873     qstep_base = intel_h264_qp_qstep(base_qp);
1874     temp = (total_area / qstep_base - sum_roi);
1875
1876     if (temp < 0) {
1877         nonroi_qp = 51;
1878     } else {
1879         qstep_nonroi = nonroi_area / temp;
1880         nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
1881     }
1882
1883     BRC_CLIP(nonroi_qp, 1, 51);
1884
1885 qp_fill:
1886     memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
1887     if (!quickfill) {
1888         char *qp_ptr;
1889
1890         for (i = 0; i < num_roi; i++) {
1891             for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
1892                 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
1893                 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
1894             }
1895         }
1896     }
1897     return vaStatus;
1898 }
1899
1900 extern void
1901 intel_h264_enc_roi_config(VADriverContextP ctx,
1902                           struct encode_state *encode_state,
1903                           struct intel_encoder_context *encoder_context)
1904 {
1905     char *qp_ptr;
1906     int i, j;
1907     VAEncROI *region_roi;
1908     struct i965_driver_data *i965 = i965_driver_data(ctx);
1909     VAEncMiscParameterBuffer* pMiscParamROI;
1910     VAEncMiscParameterBufferROI *pParamROI = NULL;
1911     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1912     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1913     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1914     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1915     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1916
1917     int row_start, row_end, col_start, col_end;
1918     int num_roi = 0;
1919
1920     vme_context->roi_enabled = 0;
1921     /* Restriction: Disable ROI when multi-slice is enabled */
1922     if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1923         return;
1924
1925     if (encode_state->misc_param[VAEncMiscParameterTypeROI][0] != NULL) {
1926         pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI][0]->buffer;
1927         pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
1928
1929         /* check whether number of ROI is correct */
1930         num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi;
1931     }
1932
1933     if (num_roi > 0)
1934         vme_context->roi_enabled = 1;
1935
1936     if (!vme_context->roi_enabled)
1937         return;
1938
1939     if ((vme_context->saved_width_mbs !=  width_in_mbs) ||
1940         (vme_context->saved_height_mbs != height_in_mbs)) {
1941         free(vme_context->qp_per_mb);
1942         vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1943
1944         vme_context->saved_width_mbs = width_in_mbs;
1945         vme_context->saved_height_mbs = height_in_mbs;
1946         assert(vme_context->qp_per_mb);
1947     }
1948     if (encoder_context->rate_control_mode == VA_RC_CBR) {
1949         /*
1950          * TODO: More complex Qp adjust needs to be added.
1951          * Currently it is initialized to slice_qp.
1952          */
1953         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1954         int qp;
1955         int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1956
1957         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1958         intel_h264_enc_roi_cbr(ctx, qp, pParamROI,encode_state, encoder_context);
1959
1960     } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1961         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1962         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1963         int qp;
1964
1965         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1966         memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1967
1968
1969         for (j = num_roi; j ; j--) {
1970             int qp_delta, qp_clip;
1971
1972             region_roi =  (VAEncROI *)pParamROI->roi + j - 1;
1973
1974             col_start = region_roi->roi_rectangle.x;
1975             col_end = col_start + region_roi->roi_rectangle.width;
1976             row_start = region_roi->roi_rectangle.y;
1977             row_end = row_start + region_roi->roi_rectangle.height;
1978
1979             col_start = col_start / 16;
1980             col_end = (col_end + 15) / 16;
1981             row_start = row_start / 16;
1982             row_end = (row_end + 15) / 16;
1983
1984             qp_delta = region_roi->roi_value;
1985             qp_clip = qp + qp_delta;
1986
1987             BRC_CLIP(qp_clip, 1, 51);
1988
1989             for (i = row_start; i < row_end; i++) {
1990                 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
1991                 memset(qp_ptr, qp_clip, (col_end - col_start));
1992             }
1993         }
1994     } else {
1995         /*
1996          * TODO: Disable it for non CBR-CQP.
1997          */
1998         vme_context->roi_enabled = 0;
1999     }
2000
2001     if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2002         encoder_context->soft_batch_force = 1;
2003
2004     return;
2005 }
2006
2007 /* HEVC */
2008 static int
2009 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2010                            VAPictureHEVC *ref_list,
2011                            int num_pictures,
2012                            int dir)
2013 {
2014     int i, found = -1, min = 0x7FFFFFFF;
2015
2016     for (i = 0; i < num_pictures; i++) {
2017         int tmp;
2018
2019         if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2020             (ref_list[i].picture_id == VA_INVALID_SURFACE))
2021             break;
2022
2023         tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2024
2025         if (dir)
2026             tmp = -tmp;
2027
2028         if (tmp > 0 && tmp < min) {
2029             min = tmp;
2030             found = i;
2031         }
2032     }
2033
2034     return found;
2035 }
2036 void
2037 intel_hevc_vme_reference_state(VADriverContextP ctx,
2038                                struct encode_state *encode_state,
2039                                struct intel_encoder_context *encoder_context,
2040                                int list_index,
2041                                int surface_index,
2042                                void (* vme_source_surface_state)(
2043                                    VADriverContextP ctx,
2044                                    int index,
2045                                    struct object_surface *obj_surface,
2046                                    struct intel_encoder_context *encoder_context))
2047 {
2048     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2049     struct object_surface *obj_surface = NULL;
2050     struct i965_driver_data *i965 = i965_driver_data(ctx);
2051     VASurfaceID ref_surface_id;
2052     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2053     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2054     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2055     int max_num_references;
2056     VAPictureHEVC *curr_pic;
2057     VAPictureHEVC *ref_list;
2058     int ref_idx;
2059     unsigned int is_hevc10 = 0;
2060     GenHevcSurface *hevc_encoder_surface = NULL;
2061
2062     if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2063         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2064         is_hevc10 = 1;
2065
2066     if (list_index == 0) {
2067         max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2068         ref_list = slice_param->ref_pic_list0;
2069     } else {
2070         max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2071         ref_list = slice_param->ref_pic_list1;
2072     }
2073
2074     if (max_num_references == 1) {
2075         if (list_index == 0) {
2076             ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2077             vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2078         } else {
2079             ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2080             vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2081         }
2082
2083         if (ref_surface_id != VA_INVALID_SURFACE)
2084             obj_surface = SURFACE(ref_surface_id);
2085
2086         if (!obj_surface ||
2087             !obj_surface->bo) {
2088             obj_surface = encode_state->reference_objects[list_index];
2089             vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2090         }
2091
2092         ref_idx = 0;
2093     } else {
2094         curr_pic = &pic_param->decoded_curr_pic;
2095
2096         /* select the reference frame in temporal space */
2097         ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2098         ref_surface_id = ref_list[ref_idx].picture_id;
2099
2100         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2101             obj_surface = SURFACE(ref_surface_id);
2102
2103         vme_context->used_reference_objects[list_index] = obj_surface;
2104         vme_context->used_references[list_index] = &ref_list[ref_idx];
2105     }
2106
2107     if (obj_surface &&
2108         obj_surface->bo) {
2109         assert(ref_idx >= 0);
2110         vme_context->used_reference_objects[list_index] = obj_surface;
2111
2112         if(is_hevc10){
2113             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2114             assert(hevc_encoder_surface);
2115             obj_surface = hevc_encoder_surface->nv12_surface_obj;
2116         }
2117         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2118         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2119                 ref_idx << 16 |
2120                 ref_idx <<  8 |
2121                 ref_idx);
2122     } else {
2123         vme_context->used_reference_objects[list_index] = NULL;
2124         vme_context->used_references[list_index] = NULL;
2125         vme_context->ref_index_in_mb[list_index] = 0;
2126     }
2127 }
2128
2129 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2130                                      struct encode_state *encode_state,
2131                                      struct intel_encoder_context *encoder_context)
2132 {
2133     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2134     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2135     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2136     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2137     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2138     int qp, m_cost, j, mv_count;
2139     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2140     float   lambda, m_costf;
2141
2142     /* here no SI SP slice for HEVC, do not need slice fixup */
2143     int slice_type = slice_param->slice_type;
2144
2145
2146     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2147
2148     if(encoder_context->rate_control_mode == VA_RC_CBR)
2149     {
2150         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2151         if(slice_type == HEVC_SLICE_B) {
2152             if(pSequenceParameter->ip_period == 1)
2153             {
2154                 slice_type = HEVC_SLICE_P;
2155                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2156
2157             }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2158                 slice_type = HEVC_SLICE_P;
2159                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2160             }
2161         }
2162
2163     }
2164
2165     if (vme_state_message == NULL)
2166         return;
2167
2168     assert(qp <= QP_MAX);
2169     lambda = intel_lambda_qp(qp);
2170     if (slice_type == HEVC_SLICE_I) {
2171         vme_state_message[MODE_INTRA_16X16] = 0;
2172         m_cost = lambda * 4;
2173         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2174         m_cost = lambda * 16;
2175         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2176         m_cost = lambda * 3;
2177         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2178     } else {
2179         m_cost = 0;
2180         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2181         for (j = 1; j < 3; j++) {
2182             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2183             m_cost = (int)m_costf;
2184             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2185         }
2186         mv_count = 3;
2187         for (j = 4; j <= 64; j *= 2) {
2188             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2189             m_cost = (int)m_costf;
2190             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2191             mv_count++;
2192         }
2193
2194         if (qp <= 25) {
2195             vme_state_message[MODE_INTRA_16X16] = 0x4a;
2196             vme_state_message[MODE_INTRA_8X8] = 0x4a;
2197             vme_state_message[MODE_INTRA_4X4] = 0x4a;
2198             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2199             vme_state_message[MODE_INTER_16X16] = 0x4a;
2200             vme_state_message[MODE_INTER_16X8] = 0x4a;
2201             vme_state_message[MODE_INTER_8X8] = 0x4a;
2202             vme_state_message[MODE_INTER_8X4] = 0x4a;
2203             vme_state_message[MODE_INTER_4X4] = 0x4a;
2204             vme_state_message[MODE_INTER_BWD] = 0x2a;
2205             return;
2206         }
2207         m_costf = lambda * 10;
2208         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2209         m_cost = lambda * 14;
2210         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2211         m_cost = lambda * 24;
2212         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2213         m_costf = lambda * 3.5;
2214         m_cost = m_costf;
2215         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2216         if (slice_type == HEVC_SLICE_P) {
2217             m_costf = lambda * 2.5;
2218             m_cost = m_costf;
2219             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2220             m_costf = lambda * 4;
2221             m_cost = m_costf;
2222             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2223             m_costf = lambda * 1.5;
2224             m_cost = m_costf;
2225             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2226             m_costf = lambda * 3;
2227             m_cost = m_costf;
2228             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2229             m_costf = lambda * 5;
2230             m_cost = m_costf;
2231             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2232             /* BWD is not used in P-frame */
2233             vme_state_message[MODE_INTER_BWD] = 0;
2234         } else {
2235             m_costf = lambda * 2.5;
2236             m_cost = m_costf;
2237             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2238             m_costf = lambda * 5.5;
2239             m_cost = m_costf;
2240             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2241             m_costf = lambda * 3.5;
2242             m_cost = m_costf;
2243             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2244             m_costf = lambda * 5.0;
2245             m_cost = m_costf;
2246             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2247             m_costf = lambda * 6.5;
2248             m_cost = m_costf;
2249             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2250             m_costf = lambda * 1.5;
2251             m_cost = m_costf;
2252             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
2253         }
2254     }
2255 }