OSDN Git Service

i965_encoder: consistently represent framerate as a fraction
[android-x86/hardware-intel-common-vaapi.git] / src / gen6_mfc_common.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "gen9_mfc.h"
45 #include "intel_media.h"
46
47 #ifndef HAVE_LOG2F
48 #define log2f(x) (logf(x)/(float)M_LN2)
49 #endif
50
51 int intel_avc_enc_slice_type_fixup(int slice_type)
52 {
53     if (slice_type == SLICE_TYPE_SP ||
54         slice_type == SLICE_TYPE_P)
55         slice_type = SLICE_TYPE_P;
56     else if (slice_type == SLICE_TYPE_SI ||
57              slice_type == SLICE_TYPE_I)
58         slice_type = SLICE_TYPE_I;
59     else {
60         if (slice_type != SLICE_TYPE_B)
61             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
62
63         slice_type = SLICE_TYPE_B;
64     }
65
66     return slice_type;
67 }
68
69 static void
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
71                                         struct intel_encoder_context *encoder_context)
72 {
73     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
74     int i;
75
76     for(i = 0 ; i < 3; i++) {
77         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79         mfc_context->bit_rate_control_context[i].GrowInit = 6;
80         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
83         
84         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
90     }
91 }
92
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94                                struct intel_encoder_context* encoder_context)
95 {
96     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97     double bitrate, framerate;
98     double frame_per_bits = 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99     double qp1_size = 0.1 * frame_per_bits;
100     double qp51_size = 0.001 * frame_per_bits;
101     double bpf, factor, hrd_factor;
102     int inum = encoder_context->brc.num_iframes_in_gop,
103         pnum = encoder_context->brc.num_pframes_in_gop,
104         bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
105     int intra_period = encoder_context->brc.gop_size;
106     int i;
107
108     if (encoder_context->layer.num_layers > 1)
109         qp1_size = 0.15 * frame_per_bits;
110
111     mfc_context->brc.mode = encoder_context->rate_control_mode;
112
113     mfc_context->hrd.violation_noted = 0;
114
115     for (i = 0; i < encoder_context->layer.num_layers; i++) {
116         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
117         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
118         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
119
120         if (i == 0) {
121             bitrate = encoder_context->brc.bits_per_second[0];
122             framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
123         } else {
124             bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
125             framerate = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) -
126                 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
127         }
128
129         if (i == encoder_context->layer.num_layers - 1)
130             factor = 1.0;
131         else {
132             factor = ((double)encoder_context->brc.framerate[i].num / (double)encoder_context->brc.framerate[i].den) /
133                 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
134         }
135
136         hrd_factor = (double)bitrate / encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
137
138         mfc_context->hrd.buffer_size[i] = (unsigned int)(encoder_context->brc.hrd_buffer_size * hrd_factor);
139         mfc_context->hrd.current_buffer_fullness[i] =
140             (double)(encoder_context->brc.hrd_initial_buffer_fullness < encoder_context->brc.hrd_buffer_size) ?
141             encoder_context->brc.hrd_initial_buffer_fullness : encoder_context->brc.hrd_buffer_size / 2.;
142         mfc_context->hrd.current_buffer_fullness[i] *= hrd_factor;
143         mfc_context->hrd.target_buffer_fullness[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / 2.;
144         mfc_context->hrd.buffer_capacity[i] = (double)encoder_context->brc.hrd_buffer_size * hrd_factor / qp1_size;
145
146         if (encoder_context->layer.num_layers > 1) {
147             if (i == 0) {
148                 intra_period = (int)(encoder_context->brc.gop_size * factor);
149                 inum = 1;
150                 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor);
151                 bnum = intra_period - inum - pnum;
152             } else {
153                 intra_period = (int)(encoder_context->brc.gop_size * factor) - intra_period;
154                 inum = 0;
155                 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor) - pnum;
156                 bnum = intra_period - inum - pnum;
157             }
158         }
159
160         mfc_context->brc.gop_nums[i][SLICE_TYPE_I] = inum;
161         mfc_context->brc.gop_nums[i][SLICE_TYPE_P] = pnum;
162         mfc_context->brc.gop_nums[i][SLICE_TYPE_B] = bnum;
163
164         mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
165                                                                     (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
166         mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
167         mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
168
169         bpf = mfc_context->brc.bits_per_frame[i] = bitrate/framerate;
170
171         if ((bpf > qp51_size) && (bpf < qp1_size)) {
172             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
173         }
174         else if (bpf >= qp1_size)
175             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
176         else if (bpf <= qp51_size)
177             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
178
179         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
180         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
181
182         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], 1, 51);
183         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], 1, 51);
184         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], 1, 51);
185     }
186 }
187
188 int intel_mfc_update_hrd(struct encode_state *encode_state,
189                          struct intel_encoder_context *encoder_context,
190                          int frame_bits)
191 {
192     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
193     int layer_id = encoder_context->layer.curr_frame_layer_id;
194     double prev_bf = mfc_context->hrd.current_buffer_fullness[layer_id];
195
196     mfc_context->hrd.current_buffer_fullness[layer_id] -= frame_bits;
197
198     if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] <= 0.) {
199         mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
200         return BRC_UNDERFLOW;
201     }
202     
203     mfc_context->hrd.current_buffer_fullness[layer_id] += mfc_context->brc.bits_per_frame[layer_id];
204     if (mfc_context->hrd.buffer_size[layer_id] > 0 && mfc_context->hrd.current_buffer_fullness[layer_id] > mfc_context->hrd.buffer_size[layer_id]) {
205         if (mfc_context->brc.mode == VA_RC_VBR)
206             mfc_context->hrd.current_buffer_fullness[layer_id] = mfc_context->hrd.buffer_size[layer_id];
207         else {
208             mfc_context->hrd.current_buffer_fullness[layer_id] = prev_bf;
209             return BRC_OVERFLOW;
210         }
211     }
212     return BRC_NO_HRD_VIOLATION;
213 }
214
215 int intel_mfc_brc_postpack(struct encode_state *encode_state,
216                            struct intel_encoder_context *encoder_context,
217                            int frame_bits)
218 {
219     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
220     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
221     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
222     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
223     int curr_frame_layer_id, next_frame_layer_id;
224     int qpi, qpp, qpb;
225     int qp; // quantizer of previously encoded slice of current type
226     int qpn; // predicted quantizer for next frame of current type in integer format
227     double qpf; // predicted quantizer for next frame of current type in float format
228     double delta_qp; // QP correction
229     int target_frame_size, frame_size_next;
230     /* Notes:
231      *  x - how far we are from HRD buffer borders
232      *  y - how far we are from target HRD buffer fullness
233      */
234     double x, y;
235     double frame_size_alpha;
236
237     if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
238         curr_frame_layer_id = 0;
239         next_frame_layer_id = 0;
240     } else {
241         curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
242         next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
243     }
244
245     /* checking wthether HRD compliance first */
246     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
247
248     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
249         /* nothing */
250     } else {
251         next_frame_layer_id = curr_frame_layer_id;
252     }
253
254     mfc_context->brc.bits_prev_frame[curr_frame_layer_id] = frame_bits;
255     frame_bits = mfc_context->brc.bits_prev_frame[next_frame_layer_id];
256
257     mfc_context->brc.prev_slice_type[curr_frame_layer_id] = slicetype;
258     slicetype = mfc_context->brc.prev_slice_type[next_frame_layer_id];
259
260     /* 0 means the next frame is the first frame of next layer */
261     if (frame_bits == 0)
262         return sts;
263
264     qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
265     qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
266     qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
267
268     qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
269
270     target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
271     if (mfc_context->hrd.buffer_capacity[next_frame_layer_id] < 5)
272         frame_size_alpha = 0;
273     else
274         frame_size_alpha = (double)mfc_context->brc.gop_nums[next_frame_layer_id][slicetype];
275     if (frame_size_alpha > 30) frame_size_alpha = 30;
276     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
277         (double)(frame_size_alpha + 1.);
278
279     /* frame_size_next: avoiding negative number and too small value */
280     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
281         frame_size_next = (int)((double)target_frame_size * 0.25);
282
283     qpf = (double)qp * target_frame_size / frame_size_next;
284     qpn = (int)(qpf + 0.5);
285
286     if (qpn == qp) {
287         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
288         mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] += qpf - qpn;
289         if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] > 1.0) {
290             qpn++;
291             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
292         } else if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] < -1.0) {
293             qpn--;
294             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
295         }
296     }
297     /* making sure that QP is not changing too fast */
298     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
299     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
300     /* making sure that with QP predictions we did do not leave QPs range */
301     BRC_CLIP(qpn, 1, 51);
302
303     /* calculating QP delta as some function*/
304     x = mfc_context->hrd.target_buffer_fullness[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
305     if (x > 0) {
306         x /= mfc_context->hrd.target_buffer_fullness[next_frame_layer_id];
307         y = mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
308     }
309     else {
310         x /= (mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.target_buffer_fullness[next_frame_layer_id]);
311         y = mfc_context->hrd.buffer_size[next_frame_layer_id] - mfc_context->hrd.current_buffer_fullness[next_frame_layer_id];
312     }
313     if (y < 0.01) y = 0.01;
314     if (x > 1) x = 1;
315     else if (x < -1) x = -1;
316
317     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
318     qpn = (int)(qpn + delta_qp + 0.5);
319
320     /* making sure that with QP predictions we did do not leave QPs range */
321     BRC_CLIP(qpn, 1, 51);
322
323     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
324         /* correcting QPs of slices of other types */
325         if (slicetype == SLICE_TYPE_P) {
326             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
327                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
328             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
329                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
330         } else if (slicetype == SLICE_TYPE_I) {
331             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
332                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
333             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
334                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
335         } else { // SLICE_TYPE_B
336             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
337                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
338             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
339                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
340         }
341         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], 1, 51);
342         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], 1, 51);
343         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], 1, 51);
344     } else if (sts == BRC_UNDERFLOW) { // underflow
345         if (qpn <= qp) qpn = qp + 1;
346         if (qpn > 51) {
347             qpn = 51;
348             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
349         }
350     } else if (sts == BRC_OVERFLOW) {
351         if (qpn >= qp) qpn = qp - 1;
352         if (qpn < 1) { // < 0 (?) overflow with minQP
353             qpn = 1;
354             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
355         }
356     }
357
358     mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
359
360     return sts;
361 }
362
363 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
364                                        struct intel_encoder_context *encoder_context)
365 {
366     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
367     unsigned int rate_control_mode = encoder_context->rate_control_mode;
368     int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
369     
370     // current we only support CBR mode.
371     if (rate_control_mode == VA_RC_CBR) {
372         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
373         mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
374         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
375         mfc_context->vui_hrd.i_frame_number = 0;
376
377         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; 
378         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
379         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
380     }
381
382 }
383
384 void 
385 intel_mfc_hrd_context_update(struct encode_state *encode_state, 
386                              struct gen6_mfc_context *mfc_context)
387 {
388     mfc_context->vui_hrd.i_frame_number++;
389 }
390
391 int intel_mfc_interlace_check(VADriverContextP ctx,
392                               struct encode_state *encode_state,
393                               struct intel_encoder_context *encoder_context)
394 {
395     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
396     VAEncSliceParameterBufferH264 *pSliceParameter;
397     int i;
398     int mbCount = 0;
399     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
400     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
401   
402     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
403         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer; 
404         mbCount += pSliceParameter->num_macroblocks; 
405     }
406     
407     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
408         return 0;
409
410     return 1;
411 }
412
413 void intel_mfc_brc_prepare(struct encode_state *encode_state,
414                            struct intel_encoder_context *encoder_context)
415 {
416     unsigned int rate_control_mode = encoder_context->rate_control_mode;
417
418     if (encoder_context->codec != CODEC_H264 &&
419         encoder_context->codec != CODEC_H264_MVC)
420         return;
421
422     if (rate_control_mode == VA_RC_CBR) {
423         /*Programing bit rate control */
424         if (encoder_context->brc.need_reset) {
425             intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
426             intel_mfc_brc_init(encode_state, encoder_context);
427         }
428
429         /*Programing HRD control */
430         if (encoder_context->brc.need_reset)
431             intel_mfc_hrd_context_init(encode_state, encoder_context);    
432     }
433 }
434
435 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
436                                               struct encode_state *encode_state,
437                                               struct intel_encoder_context *encoder_context,
438                                               struct intel_batchbuffer *slice_batch)
439 {
440     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
441     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
442     unsigned int rate_control_mode = encoder_context->rate_control_mode;
443     unsigned int skip_emul_byte_cnt;
444
445     if (encode_state->packed_header_data[idx]) {
446         VAEncPackedHeaderParameterBuffer *param = NULL;
447         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
448         unsigned int length_in_bits;
449
450         assert(encode_state->packed_header_param[idx]);
451         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
452         length_in_bits = param->bit_length;
453
454         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
455         mfc_context->insert_object(ctx,
456                                    encoder_context,
457                                    header_data,
458                                    ALIGN(length_in_bits, 32) >> 5,
459                                    length_in_bits & 0x1f,
460                                    skip_emul_byte_cnt,
461                                    0,
462                                    0,
463                                    !param->has_emulation_bytes,
464                                    slice_batch);
465     }
466
467     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
468
469     if (encode_state->packed_header_data[idx]) {
470         VAEncPackedHeaderParameterBuffer *param = NULL;
471         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
472         unsigned int length_in_bits;
473
474         assert(encode_state->packed_header_param[idx]);
475         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
476         length_in_bits = param->bit_length;
477
478         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
479
480         mfc_context->insert_object(ctx,
481                                    encoder_context,
482                                    header_data,
483                                    ALIGN(length_in_bits, 32) >> 5,
484                                    length_in_bits & 0x1f,
485                                    skip_emul_byte_cnt,
486                                    0,
487                                    0,
488                                    !param->has_emulation_bytes,
489                                    slice_batch);
490     }
491     
492     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
493
494     if (encode_state->packed_header_data[idx]) {
495         VAEncPackedHeaderParameterBuffer *param = NULL;
496         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
497         unsigned int length_in_bits;
498
499         assert(encode_state->packed_header_param[idx]);
500         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
501         length_in_bits = param->bit_length;
502
503         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
504         mfc_context->insert_object(ctx,
505                                    encoder_context,
506                                    header_data,
507                                    ALIGN(length_in_bits, 32) >> 5,
508                                    length_in_bits & 0x1f,
509                                    skip_emul_byte_cnt,
510                                    0,
511                                    0,
512                                    !param->has_emulation_bytes,
513                                    slice_batch);
514     } else if (rate_control_mode == VA_RC_CBR) {
515         // this is frist AU
516         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
517
518         unsigned char *sei_data = NULL;
519     
520         int length_in_bits = build_avc_sei_buffer_timing(
521             mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
522             mfc_context->vui_hrd.i_initial_cpb_removal_delay,
523             0,
524             mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
525             mfc_context->vui_hrd.i_dpb_output_delay_length,
526             0,
527             &sei_data);
528         mfc_context->insert_object(ctx,
529                                    encoder_context,
530                                    (unsigned int *)sei_data,
531                                    ALIGN(length_in_bits, 32) >> 5,
532                                    length_in_bits & 0x1f,
533                                    5,
534                                    0,   
535                                    0,   
536                                    1,
537                                    slice_batch);  
538         free(sei_data);
539     }
540 }
541
542 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, 
543                                struct encode_state *encode_state,
544                                struct intel_encoder_context *encoder_context)
545 {
546     struct i965_driver_data *i965 = i965_driver_data(ctx);
547     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
548     struct object_surface *obj_surface; 
549     struct object_buffer *obj_buffer;
550     GenAvcSurface *gen6_avc_surface;
551     dri_bo *bo;
552     VAStatus vaStatus = VA_STATUS_SUCCESS;
553     int i, j, enable_avc_ildb = 0;
554     VAEncSliceParameterBufferH264 *slice_param;
555     struct i965_coded_buffer_segment *coded_buffer_segment;
556     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
557     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
558     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
559
560     if (IS_GEN6(i965->intel.device_info)) {
561         /* On the SNB it should be fixed to 128 for the DMV buffer */
562         width_in_mbs = 128;
563     }
564
565     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
566         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
567         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
568
569         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
570             assert((slice_param->slice_type == SLICE_TYPE_I) ||
571                    (slice_param->slice_type == SLICE_TYPE_SI) ||
572                    (slice_param->slice_type == SLICE_TYPE_P) ||
573                    (slice_param->slice_type == SLICE_TYPE_SP) ||
574                    (slice_param->slice_type == SLICE_TYPE_B));
575
576             if (slice_param->disable_deblocking_filter_idc != 1) {
577                 enable_avc_ildb = 1;
578                 break;
579             }
580
581             slice_param++;
582         }
583     }
584
585     /*Setup all the input&output object*/
586
587     /* Setup current frame and current direct mv buffer*/
588     obj_surface = encode_state->reconstructed_object;
589     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
590
591     if ( obj_surface->private_data == NULL) {
592         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
593         assert(gen6_avc_surface);
594         gen6_avc_surface->dmv_top = 
595             dri_bo_alloc(i965->intel.bufmgr,
596                          "Buffer",
597                          68 * width_in_mbs * height_in_mbs, 
598                          64);
599         gen6_avc_surface->dmv_bottom = 
600             dri_bo_alloc(i965->intel.bufmgr,
601                          "Buffer",
602                          68 * width_in_mbs * height_in_mbs, 
603                          64);
604         assert(gen6_avc_surface->dmv_top);
605         assert(gen6_avc_surface->dmv_bottom);
606         obj_surface->private_data = (void *)gen6_avc_surface;
607         obj_surface->free_private_data = (void *)gen_free_avc_surface; 
608     }
609     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
610     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
611     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
612     dri_bo_reference(gen6_avc_surface->dmv_top);
613     dri_bo_reference(gen6_avc_surface->dmv_bottom);
614
615     if (enable_avc_ildb) {
616         mfc_context->post_deblocking_output.bo = obj_surface->bo;
617         dri_bo_reference(mfc_context->post_deblocking_output.bo);
618     } else {
619         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
620         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
621     }
622
623     mfc_context->surface_state.width = obj_surface->orig_width;
624     mfc_context->surface_state.height = obj_surface->orig_height;
625     mfc_context->surface_state.w_pitch = obj_surface->width;
626     mfc_context->surface_state.h_pitch = obj_surface->height;
627     
628     /* Setup reference frames and direct mv buffers*/
629     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
630         obj_surface = encode_state->reference_objects[i];
631         
632         if (obj_surface && obj_surface->bo) {
633             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
634             dri_bo_reference(obj_surface->bo);
635
636             /* Check DMV buffer */
637             if ( obj_surface->private_data == NULL) {
638                 
639                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
640                 assert(gen6_avc_surface);
641                 gen6_avc_surface->dmv_top = 
642                     dri_bo_alloc(i965->intel.bufmgr,
643                                  "Buffer",
644                                  68 * width_in_mbs * height_in_mbs, 
645                                  64);
646                 gen6_avc_surface->dmv_bottom = 
647                     dri_bo_alloc(i965->intel.bufmgr,
648                                  "Buffer",
649                                  68 * width_in_mbs * height_in_mbs, 
650                                  64);
651                 assert(gen6_avc_surface->dmv_top);
652                 assert(gen6_avc_surface->dmv_bottom);
653                 obj_surface->private_data = gen6_avc_surface;
654                 obj_surface->free_private_data = gen_free_avc_surface; 
655             }
656     
657             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
658             /* Setup DMV buffer */
659             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
660             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
661             dri_bo_reference(gen6_avc_surface->dmv_top);
662             dri_bo_reference(gen6_avc_surface->dmv_bottom);
663         } else {
664             break;
665         }
666     }
667
668     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
669     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
670
671     obj_buffer = encode_state->coded_buf_object;
672     bo = obj_buffer->buffer_store->bo;
673     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
674     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
675     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
676     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
677     
678     dri_bo_map(bo, 1);
679     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
680     coded_buffer_segment->mapped = 0;
681     coded_buffer_segment->codec = encoder_context->codec;
682     dri_bo_unmap(bo);
683
684     return vaStatus;
685 }
686 /*
687  * The LUT uses the pair of 4-bit units: (shift, base) structure.
688  * 2^K * X = value . 
689  * So it is necessary to convert one cost into the nearest LUT format.
690  * The derivation is:
691  * 2^K *x = 2^n * (1 + deltaX)
692  *    k + log2(x) = n + log2(1 + deltaX)
693  *    log2(x) = n - k + log2(1 + deltaX)
694  *    As X is in the range of [1, 15]
695  *      4 > n - k + log2(1 + deltaX) >= 0 
696  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
697  *    Then we can derive the corresponding K and get the nearest LUT format.
698  */
699 int intel_format_lutvalue(int value, int max)
700 {
701     int ret;
702     int logvalue, temp1, temp2;
703
704     if (value <= 0)
705         return 0;
706
707     logvalue = (int)(log2f((float)value));
708     if (logvalue < 4) {
709         ret = value;
710     } else {
711         int error, temp_value, base, j, temp_err;
712         error = value;
713         j = logvalue - 4 + 1;
714         ret = -1;
715         for(; j <= logvalue; j++) {
716             if (j == 0) {
717                 base = value >> j;
718             } else {
719                 base = (value + (1 << (j - 1)) - 1) >> j;
720             }
721             if (base >= 16)
722                 continue;
723
724             temp_value = base << j;
725             temp_err = abs(value - temp_value);
726             if (temp_err < error) {
727                 error = temp_err;
728                 ret = (j << 4) | base;
729                 if (temp_err == 0)
730                     break;
731             }
732         }
733     }
734     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
735     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
736     if (temp1 > temp2)
737         ret = max;
738     return ret;
739
740 }
741
742
743 #define         QP_MAX                  52
744 #define         VP8_QP_MAX              128
745
746
747 static float intel_lambda_qp(int qp)
748 {
749     float value, lambdaf;
750     value = qp;
751     value = value / 6 - 2;
752     if (value < 0)
753         value = 0;
754     lambdaf = roundf(powf(2, value));
755     return lambdaf;
756 }
757
758 static
759 void intel_h264_calc_mbmvcost_qp(int qp,
760                                  int slice_type,
761                                  uint8_t *vme_state_message)
762 {
763     int m_cost, j, mv_count;
764     float   lambda, m_costf;
765
766     assert(qp <= QP_MAX); 
767     lambda = intel_lambda_qp(qp);
768
769     m_cost = lambda;
770     vme_state_message[MODE_CHROMA_INTRA] = 0;
771     vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
772
773     if (slice_type == SLICE_TYPE_I) {
774         vme_state_message[MODE_INTRA_16X16] = 0;
775         m_cost = lambda * 4;
776         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
777         m_cost = lambda * 16; 
778         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
779         m_cost = lambda * 3;
780         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
781     } else {
782         m_cost = 0;
783         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
784         for (j = 1; j < 3; j++) {
785             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
786             m_cost = (int)m_costf;
787             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
788         }
789         mv_count = 3;
790         for (j = 4; j <= 64; j *= 2) {
791             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
792             m_cost = (int)m_costf;
793             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
794             mv_count++;
795         }
796
797         if (qp <= 25) {
798             vme_state_message[MODE_INTRA_16X16] = 0x4a;
799             vme_state_message[MODE_INTRA_8X8] = 0x4a;
800             vme_state_message[MODE_INTRA_4X4] = 0x4a;
801             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
802             vme_state_message[MODE_INTER_16X16] = 0x4a;
803             vme_state_message[MODE_INTER_16X8] = 0x4a;
804             vme_state_message[MODE_INTER_8X8] = 0x4a;
805             vme_state_message[MODE_INTER_8X4] = 0x4a;
806             vme_state_message[MODE_INTER_4X4] = 0x4a;
807             vme_state_message[MODE_INTER_BWD] = 0x2a;
808             return;
809         }
810         m_costf = lambda * 10;
811         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
812         m_cost = lambda * 14;
813         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
814         m_cost = lambda * 24; 
815         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
816         m_costf = lambda * 3.5;
817         m_cost = m_costf;
818         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
819         if (slice_type == SLICE_TYPE_P) {
820             m_costf = lambda * 2.5;
821             m_cost = m_costf;
822             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
823             m_costf = lambda * 4;
824             m_cost = m_costf;
825             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
826             m_costf = lambda * 1.5;
827             m_cost = m_costf;
828             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
829             m_costf = lambda * 3;
830             m_cost = m_costf;
831             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
832             m_costf = lambda * 5;
833             m_cost = m_costf;
834             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
835             /* BWD is not used in P-frame */
836             vme_state_message[MODE_INTER_BWD] = 0;
837         } else {
838             m_costf = lambda * 2.5;
839             m_cost = m_costf;
840             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
841             m_costf = lambda * 5.5;
842             m_cost = m_costf;
843             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
844             m_costf = lambda * 3.5;
845             m_cost = m_costf;
846             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
847             m_costf = lambda * 5.0;
848             m_cost = m_costf;
849             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
850             m_costf = lambda * 6.5;
851             m_cost = m_costf;
852             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
853             m_costf = lambda * 1.5;
854             m_cost = m_costf;
855             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
856         }
857     }
858     return;
859 }
860
861 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
862                                 struct encode_state *encode_state,
863                                 struct intel_encoder_context *encoder_context)
864 {
865     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
866     struct gen6_vme_context *vme_context = encoder_context->vme_context;
867     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
868     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
869     int qp;
870     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
871
872     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
873
874     if (encoder_context->rate_control_mode == VA_RC_CQP)
875         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
876     else
877         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
878
879     if (vme_state_message == NULL)
880         return;
881
882     intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
883 }
884
885 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
886                                 struct encode_state *encode_state,
887                                 struct intel_encoder_context *encoder_context)
888 {
889     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
890     struct gen6_vme_context *vme_context = encoder_context->vme_context;
891     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
892     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
893     int qp, m_cost, j, mv_count;
894     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
895     float   lambda, m_costf;
896
897     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
898     int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
899   
900     if (vme_state_message == NULL)
901         return;
902  
903     if (encoder_context->rate_control_mode == VA_RC_CQP)
904         qp = q_matrix->quantization_index[0];
905     else
906         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
907
908     lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
909
910     m_cost = lambda;
911     vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
912
913     if (is_key_frame) {
914         vme_state_message[MODE_INTRA_16X16] = 0;
915         m_cost = lambda * 16; 
916         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
917         m_cost = lambda * 3;
918         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
919     } else {
920         m_cost = 0;
921         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
922         for (j = 1; j < 3; j++) {
923             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
924             m_cost = (int)m_costf;
925             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
926         }
927         mv_count = 3;
928         for (j = 4; j <= 64; j *= 2) {
929             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
930             m_cost = (int)m_costf;
931             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
932             mv_count++;
933         }
934
935         if (qp < 92 ) {
936             vme_state_message[MODE_INTRA_16X16] = 0x4a;
937             vme_state_message[MODE_INTRA_4X4] = 0x4a;
938             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
939             vme_state_message[MODE_INTER_16X16] = 0x4a;
940             vme_state_message[MODE_INTER_16X8] = 0x4a;
941             vme_state_message[MODE_INTER_8X8] = 0x4a;
942             vme_state_message[MODE_INTER_4X4] = 0x4a;
943             vme_state_message[MODE_INTER_BWD] = 0;
944             return;
945         }
946         m_costf = lambda * 10;
947         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
948         m_cost = lambda * 24; 
949         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
950             
951         m_costf = lambda * 3.5;
952         m_cost = m_costf;
953         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
954
955         m_costf = lambda * 2.5;
956         m_cost = m_costf;
957         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
958         m_costf = lambda * 4;
959         m_cost = m_costf;
960         vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
961         m_costf = lambda * 1.5;
962         m_cost = m_costf;
963         vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
964         m_costf = lambda * 5;
965         m_cost = m_costf;
966         vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
967         /* BWD is not used in P-frame */
968         vme_state_message[MODE_INTER_BWD] = 0;
969     }
970 }
971
972 #define         MB_SCOREBOARD_A         (1 << 0)
973 #define         MB_SCOREBOARD_B         (1 << 1)
974 #define         MB_SCOREBOARD_C         (1 << 2)
975 void 
976 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
977 {
978     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
979     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
980     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
981                                                            MB_SCOREBOARD_B |
982                                                            MB_SCOREBOARD_C);
983
984     /* In VME prediction the current mb depends on the neighbour 
985      * A/B/C macroblock. So the left/up/up-right dependency should
986      * be considered.
987      */
988     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
989     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
990     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
991     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
992     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
993     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
994
995     vme_context->gpe_context.vfe_desc7.dword = 0;
996     return;
997 }
998
999 /* check whether the mb of (x_index, y_index) is out of bound */
1000 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
1001 {
1002     int mb_index;
1003     if (x_index < 0 || x_index >= mb_width)
1004         return -1;
1005     if (y_index < 0 || y_index >= mb_height)
1006         return -1;
1007
1008     mb_index = y_index * mb_width + x_index;
1009     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
1010         return -1;
1011     return 0;
1012 }
1013
1014 void
1015 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1016                                      struct encode_state *encode_state,
1017                                      int mb_width, int mb_height,
1018                                      int kernel,
1019                                      int transform_8x8_mode_flag,
1020                                      struct intel_encoder_context *encoder_context)
1021 {
1022     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1023     int mb_row;
1024     int s;
1025     unsigned int *command_ptr;
1026     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1027     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1028     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1029     int qp,qp_mb,qp_index;
1030     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1031
1032     if (encoder_context->rate_control_mode == VA_RC_CQP)
1033         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1034     else
1035         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1036
1037 #define         USE_SCOREBOARD          (1 << 21)
1038  
1039     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1040     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1041
1042     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1043         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1044         int first_mb = pSliceParameter->macroblock_address;
1045         int num_mb = pSliceParameter->num_macroblocks;
1046         unsigned int mb_intra_ub, score_dep;
1047         int x_outer, y_outer, x_inner, y_inner;
1048         int xtemp_outer = 0;
1049
1050         x_outer = first_mb % mb_width;
1051         y_outer = first_mb / mb_width;
1052         mb_row = y_outer;
1053
1054         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1055             x_inner = x_outer;
1056             y_inner = y_outer;
1057             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1058                 mb_intra_ub = 0;
1059                 score_dep = 0;
1060                 if (x_inner != 0) {
1061                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1062                     score_dep |= MB_SCOREBOARD_A; 
1063                 }
1064                 if (y_inner != mb_row) {
1065                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1066                     score_dep |= MB_SCOREBOARD_B;
1067                     if (x_inner != 0)
1068                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1069                     if (x_inner != (mb_width -1)) {
1070                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1071                         score_dep |= MB_SCOREBOARD_C;
1072                     }
1073                 }
1074
1075                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1076                 *command_ptr++ = kernel;
1077                 *command_ptr++ = USE_SCOREBOARD;
1078                 /* Indirect data */
1079                 *command_ptr++ = 0;
1080                 /* the (X, Y) term of scoreboard */
1081                 *command_ptr++ = ((y_inner << 16) | x_inner);
1082                 *command_ptr++ = score_dep;
1083                 /*inline data */
1084                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1085                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1086                 /* QP occupies one byte */
1087                 if (vme_context->roi_enabled) {
1088                     qp_index = y_inner * mb_width + x_inner;
1089                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1090                 } else
1091                     qp_mb = qp;
1092                 *command_ptr++ = qp_mb;
1093                 x_inner -= 2;
1094                 y_inner += 1;
1095             }
1096             x_outer += 1;
1097         }
1098
1099         xtemp_outer = mb_width - 2;
1100         if (xtemp_outer < 0)
1101             xtemp_outer = 0;
1102         x_outer = xtemp_outer;
1103         y_outer = first_mb / mb_width;
1104         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1105             y_inner = y_outer;
1106             x_inner = x_outer;
1107             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1108                 mb_intra_ub = 0;
1109                 score_dep = 0;
1110                 if (x_inner != 0) {
1111                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1112                     score_dep |= MB_SCOREBOARD_A; 
1113                 }
1114                 if (y_inner != mb_row) {
1115                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1116                     score_dep |= MB_SCOREBOARD_B;
1117                     if (x_inner != 0)
1118                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1119
1120                     if (x_inner != (mb_width -1)) {
1121                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1122                         score_dep |= MB_SCOREBOARD_C;
1123                     }
1124                 }
1125
1126                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1127                 *command_ptr++ = kernel;
1128                 *command_ptr++ = USE_SCOREBOARD;
1129                 /* Indirect data */
1130                 *command_ptr++ = 0;
1131                 /* the (X, Y) term of scoreboard */
1132                 *command_ptr++ = ((y_inner << 16) | x_inner);
1133                 *command_ptr++ = score_dep;
1134                 /*inline data */
1135                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1136                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1137                 /* qp occupies one byte */
1138                 if (vme_context->roi_enabled) {
1139                     qp_index = y_inner * mb_width + x_inner;
1140                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1141                 } else
1142                     qp_mb = qp;
1143                 *command_ptr++ = qp_mb;
1144
1145                 x_inner -= 2;
1146                 y_inner += 1;
1147             }
1148             x_outer++;
1149             if (x_outer >= mb_width) {
1150                 y_outer += 1;
1151                 x_outer = xtemp_outer;
1152             }           
1153         }
1154     }
1155
1156     *command_ptr++ = 0;
1157     *command_ptr++ = MI_BATCH_BUFFER_END;
1158
1159     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1160 }
1161
1162 static uint8_t
1163 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1164 {
1165     unsigned int is_long_term =
1166         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1167     unsigned int is_top_field =
1168         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1169     unsigned int is_bottom_field =
1170         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1171
1172     return ((is_long_term                         << 6) |
1173             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1174             (frame_store_id                       << 1) |
1175             ((is_top_field ^ 1) & is_bottom_field));
1176 }
1177
1178 void
1179 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1180                             struct encode_state *encode_state,
1181                             struct intel_encoder_context *encoder_context)
1182 {
1183     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1184     struct intel_batchbuffer *batch = encoder_context->base.batch;
1185     int slice_type;
1186     struct object_surface *obj_surface;
1187     unsigned int fref_entry, bref_entry;
1188     int frame_index, i;
1189     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1190
1191     fref_entry = 0x80808080;
1192     bref_entry = 0x80808080;
1193     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1194
1195     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1196         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1197
1198         if (ref_idx_l0 > 3) {
1199             WARN_ONCE("ref_idx_l0 is out of range\n");
1200             ref_idx_l0 = 0;
1201         }
1202
1203         obj_surface = vme_context->used_reference_objects[0];
1204         frame_index = -1;
1205         for (i = 0; i < 16; i++) {
1206             if (obj_surface &&
1207                 obj_surface == encode_state->reference_objects[i]) {
1208                 frame_index = i;
1209                 break;
1210             }
1211         }
1212         if (frame_index == -1) {
1213             WARN_ONCE("RefPicList0 is not found in DPB!\n");
1214         } else {
1215             int ref_idx_l0_shift = ref_idx_l0 * 8;
1216             fref_entry &= ~(0xFF << ref_idx_l0_shift);
1217             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1218         }
1219     }
1220
1221     if (slice_type == SLICE_TYPE_B) {
1222         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1223
1224         if (ref_idx_l1 > 3) {
1225             WARN_ONCE("ref_idx_l1 is out of range\n");
1226             ref_idx_l1 = 0;
1227         }
1228
1229         obj_surface = vme_context->used_reference_objects[1];
1230         frame_index = -1;
1231         for (i = 0; i < 16; i++) {
1232             if (obj_surface &&
1233                 obj_surface == encode_state->reference_objects[i]) {
1234                 frame_index = i;
1235                 break;
1236             }
1237         }
1238         if (frame_index == -1) {
1239             WARN_ONCE("RefPicList1 is not found in DPB!\n");
1240         } else {
1241             int ref_idx_l1_shift = ref_idx_l1 * 8;
1242             bref_entry &= ~(0xFF << ref_idx_l1_shift);
1243             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1244         }
1245     }
1246
1247     BEGIN_BCS_BATCH(batch, 10);
1248     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1249     OUT_BCS_BATCH(batch, 0);                  //Select L0
1250     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1251     for(i = 0; i < 7; i++) {
1252         OUT_BCS_BATCH(batch, 0x80808080);
1253     }
1254     ADVANCE_BCS_BATCH(batch);
1255
1256     BEGIN_BCS_BATCH(batch, 10);
1257     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1258     OUT_BCS_BATCH(batch, 1);                  //Select L1
1259     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1260     for(i = 0; i < 7; i++) {
1261         OUT_BCS_BATCH(batch, 0x80808080);
1262     }
1263     ADVANCE_BCS_BATCH(batch);
1264 }
1265
1266
1267 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1268                                  struct encode_state *encode_state,
1269                                  struct intel_encoder_context *encoder_context)
1270 {
1271     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1272     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1273     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1274     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1275     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1276     uint32_t mv_x, mv_y;
1277     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1278     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1279     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1280
1281     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1282         mv_x = 512;
1283         mv_y = 64;
1284     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1285         mv_x = 1024;
1286         mv_y = 128;
1287     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1288         mv_x = 2048;
1289         mv_y = 128;
1290     } else {
1291         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1292         mv_x = 512;
1293         mv_y = 64;
1294     }
1295
1296     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1297     if (pic_param->picture_type != VAEncPictureTypeIntra) {
1298         int qp, m_cost, j, mv_count;
1299         float   lambda, m_costf;
1300         slice_param = (VAEncSliceParameterBufferMPEG2 *)
1301             encode_state->slice_params_ext[0]->buffer;
1302         qp = slice_param->quantiser_scale_code;
1303         lambda = intel_lambda_qp(qp);
1304         /* No Intra prediction. So it is zero */
1305         vme_state_message[MODE_INTRA_8X8] = 0;
1306         vme_state_message[MODE_INTRA_4X4] = 0;
1307         vme_state_message[MODE_INTER_MV0] = 0;
1308         for (j = 1; j < 3; j++) {
1309             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1310             m_cost = (int)m_costf;
1311             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1312         }
1313         mv_count = 3;
1314         for (j = 4; j <= 64; j *= 2) {
1315             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1316             m_cost = (int)m_costf;
1317             vme_state_message[MODE_INTER_MV0 + mv_count] =
1318                 intel_format_lutvalue(m_cost, 0x6f);
1319             mv_count++;
1320         }
1321         m_cost = lambda;
1322         /* It can only perform the 16x16 search. So mode cost can be ignored for
1323          * the other mode. for example: 16x8/8x8
1324          */
1325         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1326         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1327
1328         vme_state_message[MODE_INTER_16X8] = 0;
1329         vme_state_message[MODE_INTER_8X8] = 0;
1330         vme_state_message[MODE_INTER_8X4] = 0;
1331         vme_state_message[MODE_INTER_4X4] = 0;
1332         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1333
1334     }
1335     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1336
1337     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1338         width_in_mbs;
1339 }
1340
1341 void
1342 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1343                                            struct encode_state *encode_state,
1344                                            int mb_width, int mb_height,
1345                                            int kernel,
1346                                            struct intel_encoder_context *encoder_context)
1347 {
1348     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1349     unsigned int *command_ptr;
1350
1351 #define         MPEG2_SCOREBOARD                (1 << 21)
1352
1353     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1354     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1355
1356     {
1357         unsigned int mb_intra_ub, score_dep;
1358         int x_outer, y_outer, x_inner, y_inner;
1359         int xtemp_outer = 0;
1360         int first_mb = 0;
1361         int num_mb = mb_width * mb_height;
1362
1363         x_outer = 0;
1364         y_outer = 0;
1365
1366
1367         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1368             x_inner = x_outer;
1369             y_inner = y_outer;
1370             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1371                 mb_intra_ub = 0;
1372                 score_dep = 0;
1373                 if (x_inner != 0) {
1374                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1375                     score_dep |= MB_SCOREBOARD_A; 
1376                 }
1377                 if (y_inner != 0) {
1378                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1379                     score_dep |= MB_SCOREBOARD_B;
1380
1381                     if (x_inner != 0)
1382                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1383
1384                     if (x_inner != (mb_width -1)) {
1385                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1386                         score_dep |= MB_SCOREBOARD_C;
1387                     }
1388                 }
1389
1390                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1391                 *command_ptr++ = kernel;
1392                 *command_ptr++ = MPEG2_SCOREBOARD;
1393                 /* Indirect data */
1394                 *command_ptr++ = 0;
1395                 /* the (X, Y) term of scoreboard */
1396                 *command_ptr++ = ((y_inner << 16) | x_inner);
1397                 *command_ptr++ = score_dep;
1398                 /*inline data */
1399                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1400                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1401                 x_inner -= 2;
1402                 y_inner += 1;
1403             }
1404             x_outer += 1;
1405         }
1406
1407         xtemp_outer = mb_width - 2;
1408         if (xtemp_outer < 0)
1409             xtemp_outer = 0;
1410         x_outer = xtemp_outer;
1411         y_outer = 0;
1412         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1413             y_inner = y_outer;
1414             x_inner = x_outer;
1415             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1416                 mb_intra_ub = 0;
1417                 score_dep = 0;
1418                 if (x_inner != 0) {
1419                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1420                     score_dep |= MB_SCOREBOARD_A; 
1421                 }
1422                 if (y_inner != 0) {
1423                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1424                     score_dep |= MB_SCOREBOARD_B;
1425
1426                     if (x_inner != 0)
1427                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1428
1429                     if (x_inner != (mb_width -1)) {
1430                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1431                         score_dep |= MB_SCOREBOARD_C;
1432                     }
1433                 }
1434
1435                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1436                 *command_ptr++ = kernel;
1437                 *command_ptr++ = MPEG2_SCOREBOARD;
1438                 /* Indirect data */
1439                 *command_ptr++ = 0;
1440                 /* the (X, Y) term of scoreboard */
1441                 *command_ptr++ = ((y_inner << 16) | x_inner);
1442                 *command_ptr++ = score_dep;
1443                 /*inline data */
1444                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1445                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1446
1447                 x_inner -= 2;
1448                 y_inner += 1;
1449             }
1450             x_outer++;
1451             if (x_outer >= mb_width) {
1452                 y_outer += 1;
1453                 x_outer = xtemp_outer;
1454             }           
1455         }
1456     }
1457
1458     *command_ptr++ = 0;
1459     *command_ptr++ = MI_BATCH_BUFFER_END;
1460
1461     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1462     return;
1463 }
1464
1465 static int
1466 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1467                           VAPictureH264 *ref_list,
1468                           int num_pictures,
1469                           int dir)
1470 {
1471     int i, found = -1, min = 0x7FFFFFFF;
1472
1473     for (i = 0; i < num_pictures; i++) {
1474         int tmp;
1475
1476         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1477             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1478             break;
1479
1480         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1481
1482         if (dir)
1483             tmp = -tmp;
1484
1485         if (tmp > 0 && tmp < min) {
1486             min = tmp;
1487             found = i;
1488         }
1489     }
1490
1491     return found;
1492 }
1493
1494 void
1495 intel_avc_vme_reference_state(VADriverContextP ctx,
1496                               struct encode_state *encode_state,
1497                               struct intel_encoder_context *encoder_context,
1498                               int list_index,
1499                               int surface_index,
1500                               void (* vme_source_surface_state)(
1501                                   VADriverContextP ctx,
1502                                   int index,
1503                                   struct object_surface *obj_surface,
1504                                   struct intel_encoder_context *encoder_context))
1505 {
1506     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1507     struct object_surface *obj_surface = NULL;
1508     struct i965_driver_data *i965 = i965_driver_data(ctx);
1509     VASurfaceID ref_surface_id;
1510     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1511     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1512     int max_num_references;
1513     VAPictureH264 *curr_pic;
1514     VAPictureH264 *ref_list;
1515     int ref_idx;
1516
1517     if (list_index == 0) {
1518         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1519         ref_list = slice_param->RefPicList0;
1520     } else {
1521         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1522         ref_list = slice_param->RefPicList1;
1523     }
1524
1525     if (max_num_references == 1) {
1526         if (list_index == 0) {
1527             ref_surface_id = slice_param->RefPicList0[0].picture_id;
1528             vme_context->used_references[0] = &slice_param->RefPicList0[0];
1529         } else {
1530             ref_surface_id = slice_param->RefPicList1[0].picture_id;
1531             vme_context->used_references[1] = &slice_param->RefPicList1[0];
1532         }
1533
1534         if (ref_surface_id != VA_INVALID_SURFACE)
1535             obj_surface = SURFACE(ref_surface_id);
1536
1537         if (!obj_surface ||
1538             !obj_surface->bo) {
1539             obj_surface = encode_state->reference_objects[list_index];
1540             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1541         }
1542
1543         ref_idx = 0;
1544     } else {
1545         curr_pic = &pic_param->CurrPic;
1546
1547         /* select the reference frame in temporal space */
1548         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1549         ref_surface_id = ref_list[ref_idx].picture_id;
1550
1551         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1552             obj_surface = SURFACE(ref_surface_id);
1553
1554         vme_context->used_reference_objects[list_index] = obj_surface;
1555         vme_context->used_references[list_index] = &ref_list[ref_idx];
1556     }
1557
1558     if (obj_surface &&
1559         obj_surface->bo) {
1560         assert(ref_idx >= 0);
1561         vme_context->used_reference_objects[list_index] = obj_surface;
1562         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1563         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1564                                                     ref_idx << 16 |
1565                                                     ref_idx <<  8 |
1566                                                     ref_idx);
1567     } else {
1568         vme_context->used_reference_objects[list_index] = NULL;
1569         vme_context->used_references[list_index] = NULL;
1570         vme_context->ref_index_in_mb[list_index] = 0;
1571     }
1572 }
1573
1574 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1575                                         struct encode_state *encode_state,
1576                                         struct intel_encoder_context *encoder_context,
1577                                         int slice_index,
1578                                         struct intel_batchbuffer *slice_batch)
1579 {
1580     int count, i, start_index;
1581     unsigned int length_in_bits;
1582     VAEncPackedHeaderParameterBuffer *param = NULL;
1583     unsigned int *header_data = NULL;
1584     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1585     int slice_header_index;
1586
1587     if (encode_state->slice_header_index[slice_index] == 0)
1588         slice_header_index = -1;
1589     else
1590         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1591
1592     count = encode_state->slice_rawdata_count[slice_index];
1593     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1594
1595     for (i = 0; i < count; i++) {
1596         unsigned int skip_emul_byte_cnt;
1597
1598         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1599
1600         param = (VAEncPackedHeaderParameterBuffer *)
1601                     (encode_state->packed_header_params_ext[start_index + i]->buffer);
1602
1603         /* skip the slice header packed data type as it is lastly inserted */
1604         if (param->type == VAEncPackedHeaderSlice)
1605             continue;
1606
1607         length_in_bits = param->bit_length;
1608
1609         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1610
1611         /* as the slice header is still required, the last header flag is set to
1612          * zero.
1613          */
1614         mfc_context->insert_object(ctx,
1615                                    encoder_context,
1616                                    header_data,
1617                                    ALIGN(length_in_bits, 32) >> 5,
1618                                    length_in_bits & 0x1f,
1619                                    skip_emul_byte_cnt,
1620                                    0,
1621                                    0,
1622                                    !param->has_emulation_bytes,
1623                                    slice_batch);
1624     }
1625
1626     if (slice_header_index == -1) {
1627         unsigned char *slice_header = NULL;
1628         int slice_header_length_in_bits = 0;
1629         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1630         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1631         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1632
1633         /* No slice header data is passed. And the driver needs to generate it */
1634         /* For the Normal H264 */
1635         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1636                                                              pPicParameter,
1637                                                              pSliceParameter,
1638                                                              &slice_header);
1639         mfc_context->insert_object(ctx, encoder_context,
1640                                    (unsigned int *)slice_header,
1641                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1642                                    slice_header_length_in_bits & 0x1f,
1643                                    5,  /* first 5 bytes are start code + nal unit type */
1644                                    1, 0, 1, slice_batch);
1645
1646         free(slice_header);
1647     } else {
1648         unsigned int skip_emul_byte_cnt;
1649
1650         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1651
1652         param = (VAEncPackedHeaderParameterBuffer *)
1653                     (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1654         length_in_bits = param->bit_length;
1655
1656         /* as the slice header is the last header data for one slice,
1657          * the last header flag is set to one.
1658          */
1659         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1660
1661         mfc_context->insert_object(ctx,
1662                                    encoder_context,
1663                                    header_data,
1664                                    ALIGN(length_in_bits, 32) >> 5,
1665                                    length_in_bits & 0x1f,
1666                                    skip_emul_byte_cnt,
1667                                    1,
1668                                    0,
1669                                    !param->has_emulation_bytes,
1670                                    slice_batch);
1671     }
1672
1673     return;
1674 }
1675
1676 void
1677 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1678                                 struct encode_state *encode_state,
1679                                 struct intel_encoder_context *encoder_context)
1680 {
1681     struct i965_driver_data *i965 = i965_driver_data(ctx);
1682     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1683     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1684     int qp;
1685     dri_bo *bo;
1686     uint8_t *cost_table;
1687
1688     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1689
1690
1691     if (slice_type == SLICE_TYPE_I) {
1692         if (vme_context->i_qp_cost_table)
1693             return;
1694     } else if (slice_type == SLICE_TYPE_P) {
1695         if (vme_context->p_qp_cost_table)
1696             return;
1697     } else {
1698         if (vme_context->b_qp_cost_table)
1699             return;
1700     }
1701
1702     /* It is enough to allocate 32 bytes for each qp. */
1703     bo = dri_bo_alloc(i965->intel.bufmgr,
1704                       "cost_table ",
1705                       QP_MAX * 32,
1706                       64);
1707
1708     dri_bo_map(bo, 1);
1709     assert(bo->virtual);
1710     cost_table = (uint8_t *)(bo->virtual);
1711     for (qp = 0; qp < QP_MAX; qp++) {
1712         intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1713         cost_table += 32;
1714     }
1715
1716     dri_bo_unmap(bo);
1717
1718     if (slice_type == SLICE_TYPE_I) {
1719         vme_context->i_qp_cost_table = bo;
1720     } else if (slice_type == SLICE_TYPE_P) {
1721         vme_context->p_qp_cost_table = bo;
1722     } else {
1723         vme_context->b_qp_cost_table = bo;
1724     }
1725
1726     vme_context->cost_table_size = QP_MAX * 32;
1727     return;
1728 }
1729
1730 extern void
1731 intel_h264_setup_cost_surface(VADriverContextP ctx,
1732                               struct encode_state *encode_state,
1733                               struct intel_encoder_context *encoder_context,
1734                               unsigned long binding_table_offset,
1735                               unsigned long surface_state_offset)
1736 {
1737     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1738     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1739     dri_bo *bo;
1740
1741
1742     struct i965_buffer_surface cost_table;
1743
1744     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1745
1746
1747     if (slice_type == SLICE_TYPE_I) {
1748         bo = vme_context->i_qp_cost_table;
1749     } else if (slice_type == SLICE_TYPE_P) {
1750         bo = vme_context->p_qp_cost_table;
1751     } else {
1752         bo = vme_context->b_qp_cost_table;
1753     }
1754
1755     cost_table.bo = bo;
1756     cost_table.num_blocks = QP_MAX;
1757     cost_table.pitch = 16;
1758     cost_table.size_block = 32;
1759
1760     vme_context->vme_buffer_suface_setup(ctx,
1761                                          &vme_context->gpe_context,
1762                                          &cost_table,
1763                                          binding_table_offset,
1764                                          surface_state_offset);
1765 }
1766
1767 /*
1768  * the idea of conversion between qp and qstep comes from scaling process
1769  * of transform coeff for Luma component in H264 spec.
1770  *   2^(Qpy / 6 - 6)
1771  * In order to avoid too small qstep, it is multiplied by 16.
1772  */
1773 static float intel_h264_qp_qstep(int qp)
1774 {
1775     float value, qstep;
1776     value = qp;
1777     value = value / 6 - 2;
1778     qstep = powf(2, value);
1779     return qstep;
1780 }
1781
1782 static int intel_h264_qstep_qp(float qstep)
1783 {
1784     float qp;
1785
1786     qp = 12.0f + 6.0f * log2f(qstep);
1787
1788     return floorf(qp);
1789 }
1790
1791 /*
1792  * Currently it is based on the following assumption:
1793  * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1794  *                                 total_aread * 1 / baseqp_qstep
1795  *
1796  * qstep is the linearized quantizer of H264 quantizer
1797  */
1798 typedef struct {
1799     int row_start_in_mb;
1800     int row_end_in_mb;
1801     int col_start_in_mb;
1802     int col_end_in_mb;
1803
1804     int width_mbs;
1805     int height_mbs;
1806
1807     int roi_qp;
1808 } ROIRegionParam;
1809
1810 static VAStatus
1811 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1812                        int base_qp,
1813                        VAEncMiscParameterBufferROI *pMiscParamROI,
1814                        struct encode_state *encode_state,
1815                        struct intel_encoder_context *encoder_context)
1816 {
1817     int nonroi_qp;
1818     VAEncROI *region_roi;
1819     bool quickfill = 0;
1820
1821     ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1822     int num_roi = 0;
1823     int i,j;
1824
1825     float temp;
1826     float qstep_nonroi, qstep_base;
1827     float roi_area, total_area, nonroi_area;
1828     float sum_roi;
1829
1830     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1831     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1832     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1833     int mbs_in_picture = width_in_mbs * height_in_mbs;
1834
1835     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1836     VAStatus vaStatus = VA_STATUS_SUCCESS;
1837
1838     if(pMiscParamROI != NULL)
1839     {
1840         num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi;
1841
1842         /* currently roi_value_is_qp_delta is the only supported mode of priority.
1843         *
1844         * qp_delta set by user is added to base_qp, which is then clapped by
1845         * [base_qp-min_delta, base_qp+max_delta].
1846         */
1847         ASSERT_RET(pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta,VA_STATUS_ERROR_INVALID_PARAMETER);
1848     }
1849
1850     /* when the base_qp is lower than 12, the quality is quite good based
1851      * on the H264 test experience.
1852      * In such case it is unnecessary to adjust the quality for ROI region.
1853      */
1854     if (base_qp <= 12) {
1855         nonroi_qp = base_qp;
1856         quickfill = 1;
1857         goto qp_fill;
1858     }
1859
1860     sum_roi = 0.0f;
1861     roi_area = 0;
1862     for (i = 0; i < num_roi; i++) {
1863         int row_start, row_end, col_start, col_end;
1864         int roi_width_mbs, roi_height_mbs;
1865         int mbs_in_roi;
1866         int roi_qp;
1867         float qstep_roi;
1868
1869         region_roi =  (VAEncROI *)pMiscParamROI->roi + i;
1870
1871         col_start = region_roi->roi_rectangle.x;
1872         col_end = col_start + region_roi->roi_rectangle.width;
1873         row_start = region_roi->roi_rectangle.y;
1874         row_end = row_start + region_roi->roi_rectangle.height;
1875         col_start = col_start / 16;
1876         col_end = (col_end + 15) / 16;
1877         row_start = row_start / 16;
1878         row_end = (row_end + 15) / 16;
1879
1880         roi_width_mbs = col_end - col_start;
1881         roi_height_mbs = row_end - row_start;
1882         mbs_in_roi = roi_width_mbs * roi_height_mbs;
1883
1884         param_regions[i].row_start_in_mb = row_start;
1885         param_regions[i].row_end_in_mb = row_end;
1886         param_regions[i].col_start_in_mb = col_start;
1887         param_regions[i].col_end_in_mb = col_end;
1888         param_regions[i].width_mbs = roi_width_mbs;
1889         param_regions[i].height_mbs = roi_height_mbs;
1890
1891         roi_qp = base_qp + region_roi->roi_value;
1892         BRC_CLIP(roi_qp, 1, 51);
1893
1894         param_regions[i].roi_qp = roi_qp;
1895         qstep_roi = intel_h264_qp_qstep(roi_qp);
1896
1897         roi_area += mbs_in_roi;
1898         sum_roi += mbs_in_roi / qstep_roi;
1899     }
1900
1901     total_area = mbs_in_picture;
1902     nonroi_area = total_area - roi_area;
1903
1904     qstep_base = intel_h264_qp_qstep(base_qp);
1905     temp = (total_area / qstep_base - sum_roi);
1906
1907     if (temp < 0) {
1908         nonroi_qp = 51;
1909     } else {
1910         qstep_nonroi = nonroi_area / temp;
1911         nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
1912     }
1913
1914     BRC_CLIP(nonroi_qp, 1, 51);
1915
1916 qp_fill:
1917     memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
1918     if (!quickfill) {
1919         char *qp_ptr;
1920
1921         for (i = 0; i < num_roi; i++) {
1922             for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
1923                 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
1924                 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
1925             }
1926         }
1927     }
1928     return vaStatus;
1929 }
1930
1931 extern void
1932 intel_h264_enc_roi_config(VADriverContextP ctx,
1933                           struct encode_state *encode_state,
1934                           struct intel_encoder_context *encoder_context)
1935 {
1936     char *qp_ptr;
1937     int i, j;
1938     VAEncROI *region_roi;
1939     struct i965_driver_data *i965 = i965_driver_data(ctx);
1940     VAEncMiscParameterBuffer* pMiscParamROI;
1941     VAEncMiscParameterBufferROI *pParamROI = NULL;
1942     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1943     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1944     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1945     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1946     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1947
1948     int row_start, row_end, col_start, col_end;
1949     int num_roi = 0;
1950
1951     vme_context->roi_enabled = 0;
1952     /* Restriction: Disable ROI when multi-slice is enabled */
1953     if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1954         return;
1955
1956     if (encode_state->misc_param[VAEncMiscParameterTypeROI][0] != NULL) {
1957         pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI][0]->buffer;
1958         pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
1959
1960         /* check whether number of ROI is correct */
1961         num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi;
1962     }
1963
1964     if (num_roi > 0)
1965         vme_context->roi_enabled = 1;
1966
1967     if (!vme_context->roi_enabled)
1968         return;
1969
1970     if ((vme_context->saved_width_mbs !=  width_in_mbs) ||
1971         (vme_context->saved_height_mbs != height_in_mbs)) {
1972         free(vme_context->qp_per_mb);
1973         vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1974
1975         vme_context->saved_width_mbs = width_in_mbs;
1976         vme_context->saved_height_mbs = height_in_mbs;
1977         assert(vme_context->qp_per_mb);
1978     }
1979     if (encoder_context->rate_control_mode == VA_RC_CBR) {
1980         /*
1981          * TODO: More complex Qp adjust needs to be added.
1982          * Currently it is initialized to slice_qp.
1983          */
1984         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1985         int qp;
1986         int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1987
1988         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1989         intel_h264_enc_roi_cbr(ctx, qp, pParamROI,encode_state, encoder_context);
1990
1991     } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1992         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1993         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1994         int qp;
1995
1996         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1997         memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1998
1999
2000         for (j = num_roi; j ; j--) {
2001             int qp_delta, qp_clip;
2002
2003             region_roi =  (VAEncROI *)pParamROI->roi + j - 1;
2004
2005             col_start = region_roi->roi_rectangle.x;
2006             col_end = col_start + region_roi->roi_rectangle.width;
2007             row_start = region_roi->roi_rectangle.y;
2008             row_end = row_start + region_roi->roi_rectangle.height;
2009
2010             col_start = col_start / 16;
2011             col_end = (col_end + 15) / 16;
2012             row_start = row_start / 16;
2013             row_end = (row_end + 15) / 16;
2014
2015             qp_delta = region_roi->roi_value;
2016             qp_clip = qp + qp_delta;
2017
2018             BRC_CLIP(qp_clip, 1, 51);
2019
2020             for (i = row_start; i < row_end; i++) {
2021                 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
2022                 memset(qp_ptr, qp_clip, (col_end - col_start));
2023             }
2024         }
2025     } else {
2026         /*
2027          * TODO: Disable it for non CBR-CQP.
2028          */
2029         vme_context->roi_enabled = 0;
2030     }
2031
2032     if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2033         encoder_context->soft_batch_force = 1;
2034
2035     return;
2036 }
2037
2038 /* HEVC */
2039 static int
2040 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2041                            VAPictureHEVC *ref_list,
2042                            int num_pictures,
2043                            int dir)
2044 {
2045     int i, found = -1, min = 0x7FFFFFFF;
2046
2047     for (i = 0; i < num_pictures; i++) {
2048         int tmp;
2049
2050         if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2051             (ref_list[i].picture_id == VA_INVALID_SURFACE))
2052             break;
2053
2054         tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2055
2056         if (dir)
2057             tmp = -tmp;
2058
2059         if (tmp > 0 && tmp < min) {
2060             min = tmp;
2061             found = i;
2062         }
2063     }
2064
2065     return found;
2066 }
2067 void
2068 intel_hevc_vme_reference_state(VADriverContextP ctx,
2069                                struct encode_state *encode_state,
2070                                struct intel_encoder_context *encoder_context,
2071                                int list_index,
2072                                int surface_index,
2073                                void (* vme_source_surface_state)(
2074                                    VADriverContextP ctx,
2075                                    int index,
2076                                    struct object_surface *obj_surface,
2077                                    struct intel_encoder_context *encoder_context))
2078 {
2079     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2080     struct object_surface *obj_surface = NULL;
2081     struct i965_driver_data *i965 = i965_driver_data(ctx);
2082     VASurfaceID ref_surface_id;
2083     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2084     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2085     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2086     int max_num_references;
2087     VAPictureHEVC *curr_pic;
2088     VAPictureHEVC *ref_list;
2089     int ref_idx;
2090     unsigned int is_hevc10 = 0;
2091     GenHevcSurface *hevc_encoder_surface = NULL;
2092
2093     if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2094         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2095         is_hevc10 = 1;
2096
2097     if (list_index == 0) {
2098         max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2099         ref_list = slice_param->ref_pic_list0;
2100     } else {
2101         max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2102         ref_list = slice_param->ref_pic_list1;
2103     }
2104
2105     if (max_num_references == 1) {
2106         if (list_index == 0) {
2107             ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2108             vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2109         } else {
2110             ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2111             vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2112         }
2113
2114         if (ref_surface_id != VA_INVALID_SURFACE)
2115             obj_surface = SURFACE(ref_surface_id);
2116
2117         if (!obj_surface ||
2118             !obj_surface->bo) {
2119             obj_surface = encode_state->reference_objects[list_index];
2120             vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2121         }
2122
2123         ref_idx = 0;
2124     } else {
2125         curr_pic = &pic_param->decoded_curr_pic;
2126
2127         /* select the reference frame in temporal space */
2128         ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2129         ref_surface_id = ref_list[ref_idx].picture_id;
2130
2131         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2132             obj_surface = SURFACE(ref_surface_id);
2133
2134         vme_context->used_reference_objects[list_index] = obj_surface;
2135         vme_context->used_references[list_index] = &ref_list[ref_idx];
2136     }
2137
2138     if (obj_surface &&
2139         obj_surface->bo) {
2140         assert(ref_idx >= 0);
2141         vme_context->used_reference_objects[list_index] = obj_surface;
2142
2143         if(is_hevc10){
2144             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2145             assert(hevc_encoder_surface);
2146             obj_surface = hevc_encoder_surface->nv12_surface_obj;
2147         }
2148         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2149         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2150                 ref_idx << 16 |
2151                 ref_idx <<  8 |
2152                 ref_idx);
2153     } else {
2154         vme_context->used_reference_objects[list_index] = NULL;
2155         vme_context->used_references[list_index] = NULL;
2156         vme_context->ref_index_in_mb[list_index] = 0;
2157     }
2158 }
2159
2160 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2161                                      struct encode_state *encode_state,
2162                                      struct intel_encoder_context *encoder_context)
2163 {
2164     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2165     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2166     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2167     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2168     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2169     int qp, m_cost, j, mv_count;
2170     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2171     float   lambda, m_costf;
2172
2173     /* here no SI SP slice for HEVC, do not need slice fixup */
2174     int slice_type = slice_param->slice_type;
2175
2176
2177     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2178
2179     if(encoder_context->rate_control_mode == VA_RC_CBR)
2180     {
2181         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2182         if(slice_type == HEVC_SLICE_B) {
2183             if(pSequenceParameter->ip_period == 1)
2184             {
2185                 slice_type = HEVC_SLICE_P;
2186                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2187
2188             }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2189                 slice_type = HEVC_SLICE_P;
2190                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2191             }
2192         }
2193
2194     }
2195
2196     if (vme_state_message == NULL)
2197         return;
2198
2199     assert(qp <= QP_MAX);
2200     lambda = intel_lambda_qp(qp);
2201     if (slice_type == HEVC_SLICE_I) {
2202         vme_state_message[MODE_INTRA_16X16] = 0;
2203         m_cost = lambda * 4;
2204         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2205         m_cost = lambda * 16;
2206         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2207         m_cost = lambda * 3;
2208         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2209     } else {
2210         m_cost = 0;
2211         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2212         for (j = 1; j < 3; j++) {
2213             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2214             m_cost = (int)m_costf;
2215             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2216         }
2217         mv_count = 3;
2218         for (j = 4; j <= 64; j *= 2) {
2219             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2220             m_cost = (int)m_costf;
2221             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2222             mv_count++;
2223         }
2224
2225         if (qp <= 25) {
2226             vme_state_message[MODE_INTRA_16X16] = 0x4a;
2227             vme_state_message[MODE_INTRA_8X8] = 0x4a;
2228             vme_state_message[MODE_INTRA_4X4] = 0x4a;
2229             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2230             vme_state_message[MODE_INTER_16X16] = 0x4a;
2231             vme_state_message[MODE_INTER_16X8] = 0x4a;
2232             vme_state_message[MODE_INTER_8X8] = 0x4a;
2233             vme_state_message[MODE_INTER_8X4] = 0x4a;
2234             vme_state_message[MODE_INTER_4X4] = 0x4a;
2235             vme_state_message[MODE_INTER_BWD] = 0x2a;
2236             return;
2237         }
2238         m_costf = lambda * 10;
2239         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2240         m_cost = lambda * 14;
2241         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2242         m_cost = lambda * 24;
2243         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2244         m_costf = lambda * 3.5;
2245         m_cost = m_costf;
2246         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2247         if (slice_type == HEVC_SLICE_P) {
2248             m_costf = lambda * 2.5;
2249             m_cost = m_costf;
2250             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2251             m_costf = lambda * 4;
2252             m_cost = m_costf;
2253             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2254             m_costf = lambda * 1.5;
2255             m_cost = m_costf;
2256             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2257             m_costf = lambda * 3;
2258             m_cost = m_costf;
2259             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2260             m_costf = lambda * 5;
2261             m_cost = m_costf;
2262             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2263             /* BWD is not used in P-frame */
2264             vme_state_message[MODE_INTER_BWD] = 0;
2265         } else {
2266             m_costf = lambda * 2.5;
2267             m_cost = m_costf;
2268             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2269             m_costf = lambda * 5.5;
2270             m_cost = m_costf;
2271             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2272             m_costf = lambda * 3.5;
2273             m_cost = m_costf;
2274             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2275             m_costf = lambda * 5.0;
2276             m_cost = m_costf;
2277             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2278             m_costf = lambda * 6.5;
2279             m_cost = m_costf;
2280             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2281             m_costf = lambda * 1.5;
2282             m_cost = m_costf;
2283             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
2284         }
2285     }
2286 }