OSDN Git Service

svct: Adjust the estimated frame size for QP=1
[android-x86/hardware-intel-common-vaapi.git] / src / gen6_mfc_common.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "gen9_mfc.h"
45 #include "intel_media.h"
46
47 #ifndef HAVE_LOG2F
48 #define log2f(x) (logf(x)/(float)M_LN2)
49 #endif
50
51 int intel_avc_enc_slice_type_fixup(int slice_type)
52 {
53     if (slice_type == SLICE_TYPE_SP ||
54         slice_type == SLICE_TYPE_P)
55         slice_type = SLICE_TYPE_P;
56     else if (slice_type == SLICE_TYPE_SI ||
57              slice_type == SLICE_TYPE_I)
58         slice_type = SLICE_TYPE_I;
59     else {
60         if (slice_type != SLICE_TYPE_B)
61             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
62
63         slice_type = SLICE_TYPE_B;
64     }
65
66     return slice_type;
67 }
68
69 static void
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
71                                         struct intel_encoder_context *encoder_context)
72 {
73     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
74     int i;
75
76     for(i = 0 ; i < 3; i++) {
77         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79         mfc_context->bit_rate_control_context[i].GrowInit = 6;
80         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
83         
84         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
90     }
91 }
92
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94                                struct intel_encoder_context* encoder_context)
95 {
96     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97     double bitrate, framerate;
98     double frame_per_bits = 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
99     double qp1_size = 0.1 * frame_per_bits;
100     double qp51_size = 0.001 * frame_per_bits;
101     double bpf, factor;
102     int inum = encoder_context->brc.num_iframes_in_gop,
103         pnum = encoder_context->brc.num_pframes_in_gop,
104         bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
105     int intra_period = encoder_context->brc.gop_size;
106     int i;
107
108     if (encoder_context->layer.num_layers > 1)
109         qp1_size = 0.15 * frame_per_bits;
110
111     mfc_context->brc.mode = encoder_context->rate_control_mode;
112
113     mfc_context->hrd.buffer_size = encoder_context->brc.hrd_buffer_size;
114     mfc_context->hrd.current_buffer_fullness =
115         (double)(encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
116         encoder_context->brc.hrd_initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
117     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
118     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
119     mfc_context->hrd.violation_noted = 0;
120
121     for (i = 0; i < encoder_context->layer.num_layers; i++) {
122         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = 26;
123         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 26;
124         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = 26;
125
126         if (i == 0) {
127             bitrate = encoder_context->brc.bits_per_second[0];
128             framerate = (double)encoder_context->brc.framerate_per_100s[0] / 100.0;
129         } else {
130             bitrate = (encoder_context->brc.bits_per_second[i] - encoder_context->brc.bits_per_second[i - 1]);
131             framerate = (double)(encoder_context->brc.framerate_per_100s[i] - encoder_context->brc.framerate_per_100s[i - 1]) / 100.0;
132         }
133
134         if (i == encoder_context->layer.num_layers - 1)
135             factor = 1.0;
136         else
137             factor = (double)encoder_context->brc.framerate_per_100s[i] / encoder_context->brc.framerate_per_100s[encoder_context->layer.num_layers - 1];
138
139         if (encoder_context->layer.num_layers > 1) {
140             if (i == 0) {
141                 intra_period = (int)(encoder_context->brc.gop_size * factor);
142                 inum = 1;
143                 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor);
144                 bnum = intra_period - inum - pnum;
145             } else {
146                 intra_period = (int)(encoder_context->brc.gop_size * factor) - intra_period;
147                 inum = 0;
148                 pnum = (int)(encoder_context->brc.num_pframes_in_gop * factor) - pnum;
149                 bnum = intra_period - inum - pnum;
150             }
151         }
152
153         mfc_context->brc.gop_nums[i][SLICE_TYPE_I] = inum;
154         mfc_context->brc.gop_nums[i][SLICE_TYPE_P] = pnum;
155         mfc_context->brc.gop_nums[i][SLICE_TYPE_B] = bnum;
156
157         mfc_context->brc.target_frame_size[i][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
158                                                                     (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
159         mfc_context->brc.target_frame_size[i][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
160         mfc_context->brc.target_frame_size[i][SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[i][SLICE_TYPE_I];
161
162         bpf = mfc_context->brc.bits_per_frame[i] = bitrate/framerate;
163
164         if ((bpf > qp51_size) && (bpf < qp1_size)) {
165             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
166         }
167         else if (bpf >= qp1_size)
168             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 1;
169         else if (bpf <= qp51_size)
170             mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P] = 51;
171
172         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P];
173         mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I];
174
175         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_I], 1, 51);
176         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_P], 1, 51);
177         BRC_CLIP(mfc_context->brc.qp_prime_y[i][SLICE_TYPE_B], 1, 51);
178     }
179 }
180
181 int intel_mfc_update_hrd(struct encode_state *encode_state,
182                          struct intel_encoder_context *encoder_context,
183                          int frame_bits)
184 {
185     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
186     double prev_bf = mfc_context->hrd.current_buffer_fullness;
187
188     mfc_context->hrd.current_buffer_fullness -= frame_bits;
189
190     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
191         mfc_context->hrd.current_buffer_fullness = prev_bf;
192         return BRC_UNDERFLOW;
193     }
194     
195     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame[encoder_context->layer.curr_frame_layer_id];
196     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
197         if (mfc_context->brc.mode == VA_RC_VBR)
198             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
199         else {
200             mfc_context->hrd.current_buffer_fullness = prev_bf;
201             return BRC_OVERFLOW;
202         }
203     }
204     return BRC_NO_HRD_VIOLATION;
205 }
206
207 int intel_mfc_brc_postpack(struct encode_state *encode_state,
208                            struct intel_encoder_context *encoder_context,
209                            int frame_bits)
210 {
211     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
212     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
213     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
214     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
215     int curr_frame_layer_id, next_frame_layer_id;
216     int qpi, qpp, qpb;
217     int qp; // quantizer of previously encoded slice of current type
218     int qpn; // predicted quantizer for next frame of current type in integer format
219     double qpf; // predicted quantizer for next frame of current type in float format
220     double delta_qp; // QP correction
221     int target_frame_size, frame_size_next;
222     /* Notes:
223      *  x - how far we are from HRD buffer borders
224      *  y - how far we are from target HRD buffer fullness
225      */
226     double x, y;
227     double frame_size_alpha;
228
229     if (encoder_context->layer.num_layers < 2 || encoder_context->layer.size_frame_layer_ids == 0) {
230         curr_frame_layer_id = 0;
231         next_frame_layer_id = 0;
232     } else {
233         curr_frame_layer_id = encoder_context->layer.curr_frame_layer_id;
234         next_frame_layer_id = encoder_context->layer.frame_layer_ids[encoder_context->num_frames_in_sequence % encoder_context->layer.size_frame_layer_ids];
235     }
236
237     /* checking wthether HRD compliance first */
238     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
239
240     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
241         /* nothing */
242     } else {
243         next_frame_layer_id = curr_frame_layer_id;
244     }
245
246     mfc_context->brc.bits_prev_frame[curr_frame_layer_id] = frame_bits;
247     frame_bits = mfc_context->brc.bits_prev_frame[next_frame_layer_id];
248
249     mfc_context->brc.prev_slice_type[curr_frame_layer_id] = slicetype;
250     slicetype = mfc_context->brc.prev_slice_type[next_frame_layer_id];
251
252     /* 0 means the next frame is the first frame of next layer */
253     if (frame_bits == 0)
254         return sts;
255
256     qpi = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I];
257     qpp = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P];
258     qpb = mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B];
259
260     qp = mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype];
261
262     target_frame_size = mfc_context->brc.target_frame_size[next_frame_layer_id][slicetype];
263     if (mfc_context->hrd.buffer_capacity < 5)
264         frame_size_alpha = 0;
265     else
266         frame_size_alpha = (double)mfc_context->brc.gop_nums[next_frame_layer_id][slicetype];
267     if (frame_size_alpha > 30) frame_size_alpha = 30;
268     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
269         (double)(frame_size_alpha + 1.);
270
271     /* frame_size_next: avoiding negative number and too small value */
272     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
273         frame_size_next = (int)((double)target_frame_size * 0.25);
274
275     qpf = (double)qp * target_frame_size / frame_size_next;
276     qpn = (int)(qpf + 0.5);
277
278     if (qpn == qp) {
279         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
280         mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] += qpf - qpn;
281         if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] > 1.0) {
282             qpn++;
283             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
284         } else if (mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] < -1.0) {
285             qpn--;
286             mfc_context->brc.qpf_rounding_accumulator[next_frame_layer_id] = 0.;
287         }
288     }
289     /* making sure that QP is not changing too fast */
290     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
291     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
292     /* making sure that with QP predictions we did do not leave QPs range */
293     BRC_CLIP(qpn, 1, 51);
294
295     /* calculating QP delta as some function*/
296     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
297     if (x > 0) {
298         x /= mfc_context->hrd.target_buffer_fullness;
299         y = mfc_context->hrd.current_buffer_fullness;
300     }
301     else {
302         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
303         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
304     }
305     if (y < 0.01) y = 0.01;
306     if (x > 1) x = 1;
307     else if (x < -1) x = -1;
308
309     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
310     qpn = (int)(qpn + delta_qp + 0.5);
311
312     /* making sure that with QP predictions we did do not leave QPs range */
313     BRC_CLIP(qpn, 1, 51);
314
315     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
316         /* correcting QPs of slices of other types */
317         if (slicetype == SLICE_TYPE_P) {
318             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
319                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
320             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
321                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
322         } else if (slicetype == SLICE_TYPE_I) {
323             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
324                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
325             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
326                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
327         } else { // SLICE_TYPE_B
328             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
329                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
330             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
331                 mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
332         }
333         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_I], 1, 51);
334         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_P], 1, 51);
335         BRC_CLIP(mfc_context->brc.qp_prime_y[next_frame_layer_id][SLICE_TYPE_B], 1, 51);
336     } else if (sts == BRC_UNDERFLOW) { // underflow
337         if (qpn <= qp) qpn = qp + 1;
338         if (qpn > 51) {
339             qpn = 51;
340             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
341         }
342     } else if (sts == BRC_OVERFLOW) {
343         if (qpn >= qp) qpn = qp - 1;
344         if (qpn < 1) { // < 0 (?) overflow with minQP
345             qpn = 1;
346             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
347         }
348     }
349
350     mfc_context->brc.qp_prime_y[next_frame_layer_id][slicetype] = qpn;
351
352     return sts;
353 }
354
355 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
356                                        struct intel_encoder_context *encoder_context)
357 {
358     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
359     unsigned int rate_control_mode = encoder_context->rate_control_mode;
360     int target_bit_rate = encoder_context->brc.bits_per_second[encoder_context->layer.num_layers - 1];
361     
362     // current we only support CBR mode.
363     if (rate_control_mode == VA_RC_CBR) {
364         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
365         mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
366         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
367         mfc_context->vui_hrd.i_frame_number = 0;
368
369         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; 
370         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
371         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
372     }
373
374 }
375
376 void 
377 intel_mfc_hrd_context_update(struct encode_state *encode_state, 
378                              struct gen6_mfc_context *mfc_context)
379 {
380     mfc_context->vui_hrd.i_frame_number++;
381 }
382
383 int intel_mfc_interlace_check(VADriverContextP ctx,
384                               struct encode_state *encode_state,
385                               struct intel_encoder_context *encoder_context)
386 {
387     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
388     VAEncSliceParameterBufferH264 *pSliceParameter;
389     int i;
390     int mbCount = 0;
391     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
392     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
393   
394     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
395         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer; 
396         mbCount += pSliceParameter->num_macroblocks; 
397     }
398     
399     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
400         return 0;
401
402     return 1;
403 }
404
405 void intel_mfc_brc_prepare(struct encode_state *encode_state,
406                            struct intel_encoder_context *encoder_context)
407 {
408     unsigned int rate_control_mode = encoder_context->rate_control_mode;
409
410     if (encoder_context->codec != CODEC_H264 &&
411         encoder_context->codec != CODEC_H264_MVC)
412         return;
413
414     if (rate_control_mode == VA_RC_CBR) {
415         /*Programing bit rate control */
416         if (encoder_context->brc.need_reset) {
417             intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
418             intel_mfc_brc_init(encode_state, encoder_context);
419         }
420
421         /*Programing HRD control */
422         if (encoder_context->brc.need_reset)
423             intel_mfc_hrd_context_init(encode_state, encoder_context);    
424     }
425 }
426
427 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
428                                               struct encode_state *encode_state,
429                                               struct intel_encoder_context *encoder_context,
430                                               struct intel_batchbuffer *slice_batch)
431 {
432     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
433     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
434     unsigned int rate_control_mode = encoder_context->rate_control_mode;
435     unsigned int skip_emul_byte_cnt;
436
437     if (encode_state->packed_header_data[idx]) {
438         VAEncPackedHeaderParameterBuffer *param = NULL;
439         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
440         unsigned int length_in_bits;
441
442         assert(encode_state->packed_header_param[idx]);
443         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
444         length_in_bits = param->bit_length;
445
446         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
447         mfc_context->insert_object(ctx,
448                                    encoder_context,
449                                    header_data,
450                                    ALIGN(length_in_bits, 32) >> 5,
451                                    length_in_bits & 0x1f,
452                                    skip_emul_byte_cnt,
453                                    0,
454                                    0,
455                                    !param->has_emulation_bytes,
456                                    slice_batch);
457     }
458
459     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
460
461     if (encode_state->packed_header_data[idx]) {
462         VAEncPackedHeaderParameterBuffer *param = NULL;
463         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
464         unsigned int length_in_bits;
465
466         assert(encode_state->packed_header_param[idx]);
467         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
468         length_in_bits = param->bit_length;
469
470         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
471
472         mfc_context->insert_object(ctx,
473                                    encoder_context,
474                                    header_data,
475                                    ALIGN(length_in_bits, 32) >> 5,
476                                    length_in_bits & 0x1f,
477                                    skip_emul_byte_cnt,
478                                    0,
479                                    0,
480                                    !param->has_emulation_bytes,
481                                    slice_batch);
482     }
483     
484     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
485
486     if (encode_state->packed_header_data[idx]) {
487         VAEncPackedHeaderParameterBuffer *param = NULL;
488         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
489         unsigned int length_in_bits;
490
491         assert(encode_state->packed_header_param[idx]);
492         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
493         length_in_bits = param->bit_length;
494
495         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
496         mfc_context->insert_object(ctx,
497                                    encoder_context,
498                                    header_data,
499                                    ALIGN(length_in_bits, 32) >> 5,
500                                    length_in_bits & 0x1f,
501                                    skip_emul_byte_cnt,
502                                    0,
503                                    0,
504                                    !param->has_emulation_bytes,
505                                    slice_batch);
506     } else if (rate_control_mode == VA_RC_CBR) {
507         // this is frist AU
508         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
509
510         unsigned char *sei_data = NULL;
511     
512         int length_in_bits = build_avc_sei_buffer_timing(
513             mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
514             mfc_context->vui_hrd.i_initial_cpb_removal_delay,
515             0,
516             mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
517             mfc_context->vui_hrd.i_dpb_output_delay_length,
518             0,
519             &sei_data);
520         mfc_context->insert_object(ctx,
521                                    encoder_context,
522                                    (unsigned int *)sei_data,
523                                    ALIGN(length_in_bits, 32) >> 5,
524                                    length_in_bits & 0x1f,
525                                    5,
526                                    0,   
527                                    0,   
528                                    1,
529                                    slice_batch);  
530         free(sei_data);
531     }
532 }
533
534 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, 
535                                struct encode_state *encode_state,
536                                struct intel_encoder_context *encoder_context)
537 {
538     struct i965_driver_data *i965 = i965_driver_data(ctx);
539     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
540     struct object_surface *obj_surface; 
541     struct object_buffer *obj_buffer;
542     GenAvcSurface *gen6_avc_surface;
543     dri_bo *bo;
544     VAStatus vaStatus = VA_STATUS_SUCCESS;
545     int i, j, enable_avc_ildb = 0;
546     VAEncSliceParameterBufferH264 *slice_param;
547     struct i965_coded_buffer_segment *coded_buffer_segment;
548     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
549     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
550     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
551
552     if (IS_GEN6(i965->intel.device_info)) {
553         /* On the SNB it should be fixed to 128 for the DMV buffer */
554         width_in_mbs = 128;
555     }
556
557     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
558         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
559         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
560
561         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
562             assert((slice_param->slice_type == SLICE_TYPE_I) ||
563                    (slice_param->slice_type == SLICE_TYPE_SI) ||
564                    (slice_param->slice_type == SLICE_TYPE_P) ||
565                    (slice_param->slice_type == SLICE_TYPE_SP) ||
566                    (slice_param->slice_type == SLICE_TYPE_B));
567
568             if (slice_param->disable_deblocking_filter_idc != 1) {
569                 enable_avc_ildb = 1;
570                 break;
571             }
572
573             slice_param++;
574         }
575     }
576
577     /*Setup all the input&output object*/
578
579     /* Setup current frame and current direct mv buffer*/
580     obj_surface = encode_state->reconstructed_object;
581     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
582
583     if ( obj_surface->private_data == NULL) {
584         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
585         assert(gen6_avc_surface);
586         gen6_avc_surface->dmv_top = 
587             dri_bo_alloc(i965->intel.bufmgr,
588                          "Buffer",
589                          68 * width_in_mbs * height_in_mbs, 
590                          64);
591         gen6_avc_surface->dmv_bottom = 
592             dri_bo_alloc(i965->intel.bufmgr,
593                          "Buffer",
594                          68 * width_in_mbs * height_in_mbs, 
595                          64);
596         assert(gen6_avc_surface->dmv_top);
597         assert(gen6_avc_surface->dmv_bottom);
598         obj_surface->private_data = (void *)gen6_avc_surface;
599         obj_surface->free_private_data = (void *)gen_free_avc_surface; 
600     }
601     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
602     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
603     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
604     dri_bo_reference(gen6_avc_surface->dmv_top);
605     dri_bo_reference(gen6_avc_surface->dmv_bottom);
606
607     if (enable_avc_ildb) {
608         mfc_context->post_deblocking_output.bo = obj_surface->bo;
609         dri_bo_reference(mfc_context->post_deblocking_output.bo);
610     } else {
611         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
612         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
613     }
614
615     mfc_context->surface_state.width = obj_surface->orig_width;
616     mfc_context->surface_state.height = obj_surface->orig_height;
617     mfc_context->surface_state.w_pitch = obj_surface->width;
618     mfc_context->surface_state.h_pitch = obj_surface->height;
619     
620     /* Setup reference frames and direct mv buffers*/
621     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
622         obj_surface = encode_state->reference_objects[i];
623         
624         if (obj_surface && obj_surface->bo) {
625             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
626             dri_bo_reference(obj_surface->bo);
627
628             /* Check DMV buffer */
629             if ( obj_surface->private_data == NULL) {
630                 
631                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
632                 assert(gen6_avc_surface);
633                 gen6_avc_surface->dmv_top = 
634                     dri_bo_alloc(i965->intel.bufmgr,
635                                  "Buffer",
636                                  68 * width_in_mbs * height_in_mbs, 
637                                  64);
638                 gen6_avc_surface->dmv_bottom = 
639                     dri_bo_alloc(i965->intel.bufmgr,
640                                  "Buffer",
641                                  68 * width_in_mbs * height_in_mbs, 
642                                  64);
643                 assert(gen6_avc_surface->dmv_top);
644                 assert(gen6_avc_surface->dmv_bottom);
645                 obj_surface->private_data = gen6_avc_surface;
646                 obj_surface->free_private_data = gen_free_avc_surface; 
647             }
648     
649             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
650             /* Setup DMV buffer */
651             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
652             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
653             dri_bo_reference(gen6_avc_surface->dmv_top);
654             dri_bo_reference(gen6_avc_surface->dmv_bottom);
655         } else {
656             break;
657         }
658     }
659
660     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
661     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
662
663     obj_buffer = encode_state->coded_buf_object;
664     bo = obj_buffer->buffer_store->bo;
665     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
666     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
667     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
668     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
669     
670     dri_bo_map(bo, 1);
671     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
672     coded_buffer_segment->mapped = 0;
673     coded_buffer_segment->codec = encoder_context->codec;
674     dri_bo_unmap(bo);
675
676     return vaStatus;
677 }
678 /*
679  * The LUT uses the pair of 4-bit units: (shift, base) structure.
680  * 2^K * X = value . 
681  * So it is necessary to convert one cost into the nearest LUT format.
682  * The derivation is:
683  * 2^K *x = 2^n * (1 + deltaX)
684  *    k + log2(x) = n + log2(1 + deltaX)
685  *    log2(x) = n - k + log2(1 + deltaX)
686  *    As X is in the range of [1, 15]
687  *      4 > n - k + log2(1 + deltaX) >= 0 
688  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
689  *    Then we can derive the corresponding K and get the nearest LUT format.
690  */
691 int intel_format_lutvalue(int value, int max)
692 {
693     int ret;
694     int logvalue, temp1, temp2;
695
696     if (value <= 0)
697         return 0;
698
699     logvalue = (int)(log2f((float)value));
700     if (logvalue < 4) {
701         ret = value;
702     } else {
703         int error, temp_value, base, j, temp_err;
704         error = value;
705         j = logvalue - 4 + 1;
706         ret = -1;
707         for(; j <= logvalue; j++) {
708             if (j == 0) {
709                 base = value >> j;
710             } else {
711                 base = (value + (1 << (j - 1)) - 1) >> j;
712             }
713             if (base >= 16)
714                 continue;
715
716             temp_value = base << j;
717             temp_err = abs(value - temp_value);
718             if (temp_err < error) {
719                 error = temp_err;
720                 ret = (j << 4) | base;
721                 if (temp_err == 0)
722                     break;
723             }
724         }
725     }
726     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
727     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
728     if (temp1 > temp2)
729         ret = max;
730     return ret;
731
732 }
733
734
735 #define         QP_MAX                  52
736 #define         VP8_QP_MAX              128
737
738
739 static float intel_lambda_qp(int qp)
740 {
741     float value, lambdaf;
742     value = qp;
743     value = value / 6 - 2;
744     if (value < 0)
745         value = 0;
746     lambdaf = roundf(powf(2, value));
747     return lambdaf;
748 }
749
750 static
751 void intel_h264_calc_mbmvcost_qp(int qp,
752                                  int slice_type,
753                                  uint8_t *vme_state_message)
754 {
755     int m_cost, j, mv_count;
756     float   lambda, m_costf;
757
758     assert(qp <= QP_MAX); 
759     lambda = intel_lambda_qp(qp);
760
761     m_cost = lambda;
762     vme_state_message[MODE_CHROMA_INTRA] = 0;
763     vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
764
765     if (slice_type == SLICE_TYPE_I) {
766         vme_state_message[MODE_INTRA_16X16] = 0;
767         m_cost = lambda * 4;
768         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
769         m_cost = lambda * 16; 
770         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
771         m_cost = lambda * 3;
772         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
773     } else {
774         m_cost = 0;
775         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
776         for (j = 1; j < 3; j++) {
777             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
778             m_cost = (int)m_costf;
779             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
780         }
781         mv_count = 3;
782         for (j = 4; j <= 64; j *= 2) {
783             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
784             m_cost = (int)m_costf;
785             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
786             mv_count++;
787         }
788
789         if (qp <= 25) {
790             vme_state_message[MODE_INTRA_16X16] = 0x4a;
791             vme_state_message[MODE_INTRA_8X8] = 0x4a;
792             vme_state_message[MODE_INTRA_4X4] = 0x4a;
793             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
794             vme_state_message[MODE_INTER_16X16] = 0x4a;
795             vme_state_message[MODE_INTER_16X8] = 0x4a;
796             vme_state_message[MODE_INTER_8X8] = 0x4a;
797             vme_state_message[MODE_INTER_8X4] = 0x4a;
798             vme_state_message[MODE_INTER_4X4] = 0x4a;
799             vme_state_message[MODE_INTER_BWD] = 0x2a;
800             return;
801         }
802         m_costf = lambda * 10;
803         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
804         m_cost = lambda * 14;
805         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
806         m_cost = lambda * 24; 
807         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
808         m_costf = lambda * 3.5;
809         m_cost = m_costf;
810         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
811         if (slice_type == SLICE_TYPE_P) {
812             m_costf = lambda * 2.5;
813             m_cost = m_costf;
814             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
815             m_costf = lambda * 4;
816             m_cost = m_costf;
817             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
818             m_costf = lambda * 1.5;
819             m_cost = m_costf;
820             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
821             m_costf = lambda * 3;
822             m_cost = m_costf;
823             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
824             m_costf = lambda * 5;
825             m_cost = m_costf;
826             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
827             /* BWD is not used in P-frame */
828             vme_state_message[MODE_INTER_BWD] = 0;
829         } else {
830             m_costf = lambda * 2.5;
831             m_cost = m_costf;
832             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
833             m_costf = lambda * 5.5;
834             m_cost = m_costf;
835             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
836             m_costf = lambda * 3.5;
837             m_cost = m_costf;
838             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
839             m_costf = lambda * 5.0;
840             m_cost = m_costf;
841             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
842             m_costf = lambda * 6.5;
843             m_cost = m_costf;
844             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
845             m_costf = lambda * 1.5;
846             m_cost = m_costf;
847             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
848         }
849     }
850     return;
851 }
852
853 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
854                                 struct encode_state *encode_state,
855                                 struct intel_encoder_context *encoder_context)
856 {
857     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
858     struct gen6_vme_context *vme_context = encoder_context->vme_context;
859     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
860     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
861     int qp;
862     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
863
864     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
865
866     if (encoder_context->rate_control_mode == VA_RC_CQP)
867         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
868     else
869         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
870
871     if (vme_state_message == NULL)
872         return;
873
874     intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
875 }
876
877 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
878                                 struct encode_state *encode_state,
879                                 struct intel_encoder_context *encoder_context)
880 {
881     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
882     struct gen6_vme_context *vme_context = encoder_context->vme_context;
883     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
884     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
885     int qp, m_cost, j, mv_count;
886     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
887     float   lambda, m_costf;
888
889     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
890     int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
891   
892     if (vme_state_message == NULL)
893         return;
894  
895     if (encoder_context->rate_control_mode == VA_RC_CQP)
896         qp = q_matrix->quantization_index[0];
897     else
898         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
899
900     lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
901
902     m_cost = lambda;
903     vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
904
905     if (is_key_frame) {
906         vme_state_message[MODE_INTRA_16X16] = 0;
907         m_cost = lambda * 16; 
908         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
909         m_cost = lambda * 3;
910         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
911     } else {
912         m_cost = 0;
913         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
914         for (j = 1; j < 3; j++) {
915             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
916             m_cost = (int)m_costf;
917             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
918         }
919         mv_count = 3;
920         for (j = 4; j <= 64; j *= 2) {
921             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
922             m_cost = (int)m_costf;
923             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
924             mv_count++;
925         }
926
927         if (qp < 92 ) {
928             vme_state_message[MODE_INTRA_16X16] = 0x4a;
929             vme_state_message[MODE_INTRA_4X4] = 0x4a;
930             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
931             vme_state_message[MODE_INTER_16X16] = 0x4a;
932             vme_state_message[MODE_INTER_16X8] = 0x4a;
933             vme_state_message[MODE_INTER_8X8] = 0x4a;
934             vme_state_message[MODE_INTER_4X4] = 0x4a;
935             vme_state_message[MODE_INTER_BWD] = 0;
936             return;
937         }
938         m_costf = lambda * 10;
939         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
940         m_cost = lambda * 24; 
941         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
942             
943         m_costf = lambda * 3.5;
944         m_cost = m_costf;
945         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
946
947         m_costf = lambda * 2.5;
948         m_cost = m_costf;
949         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
950         m_costf = lambda * 4;
951         m_cost = m_costf;
952         vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
953         m_costf = lambda * 1.5;
954         m_cost = m_costf;
955         vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
956         m_costf = lambda * 5;
957         m_cost = m_costf;
958         vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
959         /* BWD is not used in P-frame */
960         vme_state_message[MODE_INTER_BWD] = 0;
961     }
962 }
963
964 #define         MB_SCOREBOARD_A         (1 << 0)
965 #define         MB_SCOREBOARD_B         (1 << 1)
966 #define         MB_SCOREBOARD_C         (1 << 2)
967 void 
968 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
969 {
970     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
971     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
972     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
973                                                            MB_SCOREBOARD_B |
974                                                            MB_SCOREBOARD_C);
975
976     /* In VME prediction the current mb depends on the neighbour 
977      * A/B/C macroblock. So the left/up/up-right dependency should
978      * be considered.
979      */
980     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
981     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
982     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
983     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
984     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
985     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
986
987     vme_context->gpe_context.vfe_desc7.dword = 0;
988     return;
989 }
990
991 /* check whether the mb of (x_index, y_index) is out of bound */
992 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
993 {
994     int mb_index;
995     if (x_index < 0 || x_index >= mb_width)
996         return -1;
997     if (y_index < 0 || y_index >= mb_height)
998         return -1;
999
1000     mb_index = y_index * mb_width + x_index;
1001     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
1002         return -1;
1003     return 0;
1004 }
1005
1006 void
1007 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1008                                      struct encode_state *encode_state,
1009                                      int mb_width, int mb_height,
1010                                      int kernel,
1011                                      int transform_8x8_mode_flag,
1012                                      struct intel_encoder_context *encoder_context)
1013 {
1014     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1015     int mb_row;
1016     int s;
1017     unsigned int *command_ptr;
1018     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1019     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1020     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1021     int qp,qp_mb,qp_index;
1022     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1023
1024     if (encoder_context->rate_control_mode == VA_RC_CQP)
1025         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1026     else
1027         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1028
1029 #define         USE_SCOREBOARD          (1 << 21)
1030  
1031     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1032     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1033
1034     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1035         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1036         int first_mb = pSliceParameter->macroblock_address;
1037         int num_mb = pSliceParameter->num_macroblocks;
1038         unsigned int mb_intra_ub, score_dep;
1039         int x_outer, y_outer, x_inner, y_inner;
1040         int xtemp_outer = 0;
1041
1042         x_outer = first_mb % mb_width;
1043         y_outer = first_mb / mb_width;
1044         mb_row = y_outer;
1045
1046         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1047             x_inner = x_outer;
1048             y_inner = y_outer;
1049             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1050                 mb_intra_ub = 0;
1051                 score_dep = 0;
1052                 if (x_inner != 0) {
1053                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1054                     score_dep |= MB_SCOREBOARD_A; 
1055                 }
1056                 if (y_inner != mb_row) {
1057                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1058                     score_dep |= MB_SCOREBOARD_B;
1059                     if (x_inner != 0)
1060                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1061                     if (x_inner != (mb_width -1)) {
1062                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1063                         score_dep |= MB_SCOREBOARD_C;
1064                     }
1065                 }
1066
1067                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1068                 *command_ptr++ = kernel;
1069                 *command_ptr++ = USE_SCOREBOARD;
1070                 /* Indirect data */
1071                 *command_ptr++ = 0;
1072                 /* the (X, Y) term of scoreboard */
1073                 *command_ptr++ = ((y_inner << 16) | x_inner);
1074                 *command_ptr++ = score_dep;
1075                 /*inline data */
1076                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1077                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1078                 /* QP occupies one byte */
1079                 if (vme_context->roi_enabled) {
1080                     qp_index = y_inner * mb_width + x_inner;
1081                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1082                 } else
1083                     qp_mb = qp;
1084                 *command_ptr++ = qp_mb;
1085                 x_inner -= 2;
1086                 y_inner += 1;
1087             }
1088             x_outer += 1;
1089         }
1090
1091         xtemp_outer = mb_width - 2;
1092         if (xtemp_outer < 0)
1093             xtemp_outer = 0;
1094         x_outer = xtemp_outer;
1095         y_outer = first_mb / mb_width;
1096         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1097             y_inner = y_outer;
1098             x_inner = x_outer;
1099             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1100                 mb_intra_ub = 0;
1101                 score_dep = 0;
1102                 if (x_inner != 0) {
1103                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1104                     score_dep |= MB_SCOREBOARD_A; 
1105                 }
1106                 if (y_inner != mb_row) {
1107                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1108                     score_dep |= MB_SCOREBOARD_B;
1109                     if (x_inner != 0)
1110                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1111
1112                     if (x_inner != (mb_width -1)) {
1113                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1114                         score_dep |= MB_SCOREBOARD_C;
1115                     }
1116                 }
1117
1118                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1119                 *command_ptr++ = kernel;
1120                 *command_ptr++ = USE_SCOREBOARD;
1121                 /* Indirect data */
1122                 *command_ptr++ = 0;
1123                 /* the (X, Y) term of scoreboard */
1124                 *command_ptr++ = ((y_inner << 16) | x_inner);
1125                 *command_ptr++ = score_dep;
1126                 /*inline data */
1127                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1128                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1129                 /* qp occupies one byte */
1130                 if (vme_context->roi_enabled) {
1131                     qp_index = y_inner * mb_width + x_inner;
1132                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1133                 } else
1134                     qp_mb = qp;
1135                 *command_ptr++ = qp_mb;
1136
1137                 x_inner -= 2;
1138                 y_inner += 1;
1139             }
1140             x_outer++;
1141             if (x_outer >= mb_width) {
1142                 y_outer += 1;
1143                 x_outer = xtemp_outer;
1144             }           
1145         }
1146     }
1147
1148     *command_ptr++ = 0;
1149     *command_ptr++ = MI_BATCH_BUFFER_END;
1150
1151     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1152 }
1153
1154 static uint8_t
1155 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1156 {
1157     unsigned int is_long_term =
1158         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1159     unsigned int is_top_field =
1160         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1161     unsigned int is_bottom_field =
1162         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1163
1164     return ((is_long_term                         << 6) |
1165             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1166             (frame_store_id                       << 1) |
1167             ((is_top_field ^ 1) & is_bottom_field));
1168 }
1169
1170 void
1171 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1172                             struct encode_state *encode_state,
1173                             struct intel_encoder_context *encoder_context)
1174 {
1175     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1176     struct intel_batchbuffer *batch = encoder_context->base.batch;
1177     int slice_type;
1178     struct object_surface *obj_surface;
1179     unsigned int fref_entry, bref_entry;
1180     int frame_index, i;
1181     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1182
1183     fref_entry = 0x80808080;
1184     bref_entry = 0x80808080;
1185     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1186
1187     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1188         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1189
1190         if (ref_idx_l0 > 3) {
1191             WARN_ONCE("ref_idx_l0 is out of range\n");
1192             ref_idx_l0 = 0;
1193         }
1194
1195         obj_surface = vme_context->used_reference_objects[0];
1196         frame_index = -1;
1197         for (i = 0; i < 16; i++) {
1198             if (obj_surface &&
1199                 obj_surface == encode_state->reference_objects[i]) {
1200                 frame_index = i;
1201                 break;
1202             }
1203         }
1204         if (frame_index == -1) {
1205             WARN_ONCE("RefPicList0 is not found in DPB!\n");
1206         } else {
1207             int ref_idx_l0_shift = ref_idx_l0 * 8;
1208             fref_entry &= ~(0xFF << ref_idx_l0_shift);
1209             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1210         }
1211     }
1212
1213     if (slice_type == SLICE_TYPE_B) {
1214         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1215
1216         if (ref_idx_l1 > 3) {
1217             WARN_ONCE("ref_idx_l1 is out of range\n");
1218             ref_idx_l1 = 0;
1219         }
1220
1221         obj_surface = vme_context->used_reference_objects[1];
1222         frame_index = -1;
1223         for (i = 0; i < 16; i++) {
1224             if (obj_surface &&
1225                 obj_surface == encode_state->reference_objects[i]) {
1226                 frame_index = i;
1227                 break;
1228             }
1229         }
1230         if (frame_index == -1) {
1231             WARN_ONCE("RefPicList1 is not found in DPB!\n");
1232         } else {
1233             int ref_idx_l1_shift = ref_idx_l1 * 8;
1234             bref_entry &= ~(0xFF << ref_idx_l1_shift);
1235             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1236         }
1237     }
1238
1239     BEGIN_BCS_BATCH(batch, 10);
1240     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1241     OUT_BCS_BATCH(batch, 0);                  //Select L0
1242     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1243     for(i = 0; i < 7; i++) {
1244         OUT_BCS_BATCH(batch, 0x80808080);
1245     }
1246     ADVANCE_BCS_BATCH(batch);
1247
1248     BEGIN_BCS_BATCH(batch, 10);
1249     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1250     OUT_BCS_BATCH(batch, 1);                  //Select L1
1251     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1252     for(i = 0; i < 7; i++) {
1253         OUT_BCS_BATCH(batch, 0x80808080);
1254     }
1255     ADVANCE_BCS_BATCH(batch);
1256 }
1257
1258
1259 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1260                                  struct encode_state *encode_state,
1261                                  struct intel_encoder_context *encoder_context)
1262 {
1263     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1264     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1265     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1266     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1267     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1268     uint32_t mv_x, mv_y;
1269     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1270     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1271     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1272
1273     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1274         mv_x = 512;
1275         mv_y = 64;
1276     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1277         mv_x = 1024;
1278         mv_y = 128;
1279     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1280         mv_x = 2048;
1281         mv_y = 128;
1282     } else {
1283         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1284         mv_x = 512;
1285         mv_y = 64;
1286     }
1287
1288     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1289     if (pic_param->picture_type != VAEncPictureTypeIntra) {
1290         int qp, m_cost, j, mv_count;
1291         float   lambda, m_costf;
1292         slice_param = (VAEncSliceParameterBufferMPEG2 *)
1293             encode_state->slice_params_ext[0]->buffer;
1294         qp = slice_param->quantiser_scale_code;
1295         lambda = intel_lambda_qp(qp);
1296         /* No Intra prediction. So it is zero */
1297         vme_state_message[MODE_INTRA_8X8] = 0;
1298         vme_state_message[MODE_INTRA_4X4] = 0;
1299         vme_state_message[MODE_INTER_MV0] = 0;
1300         for (j = 1; j < 3; j++) {
1301             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1302             m_cost = (int)m_costf;
1303             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1304         }
1305         mv_count = 3;
1306         for (j = 4; j <= 64; j *= 2) {
1307             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1308             m_cost = (int)m_costf;
1309             vme_state_message[MODE_INTER_MV0 + mv_count] =
1310                 intel_format_lutvalue(m_cost, 0x6f);
1311             mv_count++;
1312         }
1313         m_cost = lambda;
1314         /* It can only perform the 16x16 search. So mode cost can be ignored for
1315          * the other mode. for example: 16x8/8x8
1316          */
1317         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1318         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1319
1320         vme_state_message[MODE_INTER_16X8] = 0;
1321         vme_state_message[MODE_INTER_8X8] = 0;
1322         vme_state_message[MODE_INTER_8X4] = 0;
1323         vme_state_message[MODE_INTER_4X4] = 0;
1324         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1325
1326     }
1327     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1328
1329     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1330         width_in_mbs;
1331 }
1332
1333 void
1334 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1335                                            struct encode_state *encode_state,
1336                                            int mb_width, int mb_height,
1337                                            int kernel,
1338                                            struct intel_encoder_context *encoder_context)
1339 {
1340     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1341     unsigned int *command_ptr;
1342
1343 #define         MPEG2_SCOREBOARD                (1 << 21)
1344
1345     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1346     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1347
1348     {
1349         unsigned int mb_intra_ub, score_dep;
1350         int x_outer, y_outer, x_inner, y_inner;
1351         int xtemp_outer = 0;
1352         int first_mb = 0;
1353         int num_mb = mb_width * mb_height;
1354
1355         x_outer = 0;
1356         y_outer = 0;
1357
1358
1359         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1360             x_inner = x_outer;
1361             y_inner = y_outer;
1362             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1363                 mb_intra_ub = 0;
1364                 score_dep = 0;
1365                 if (x_inner != 0) {
1366                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1367                     score_dep |= MB_SCOREBOARD_A; 
1368                 }
1369                 if (y_inner != 0) {
1370                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1371                     score_dep |= MB_SCOREBOARD_B;
1372
1373                     if (x_inner != 0)
1374                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1375
1376                     if (x_inner != (mb_width -1)) {
1377                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1378                         score_dep |= MB_SCOREBOARD_C;
1379                     }
1380                 }
1381
1382                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1383                 *command_ptr++ = kernel;
1384                 *command_ptr++ = MPEG2_SCOREBOARD;
1385                 /* Indirect data */
1386                 *command_ptr++ = 0;
1387                 /* the (X, Y) term of scoreboard */
1388                 *command_ptr++ = ((y_inner << 16) | x_inner);
1389                 *command_ptr++ = score_dep;
1390                 /*inline data */
1391                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1392                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1393                 x_inner -= 2;
1394                 y_inner += 1;
1395             }
1396             x_outer += 1;
1397         }
1398
1399         xtemp_outer = mb_width - 2;
1400         if (xtemp_outer < 0)
1401             xtemp_outer = 0;
1402         x_outer = xtemp_outer;
1403         y_outer = 0;
1404         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1405             y_inner = y_outer;
1406             x_inner = x_outer;
1407             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1408                 mb_intra_ub = 0;
1409                 score_dep = 0;
1410                 if (x_inner != 0) {
1411                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1412                     score_dep |= MB_SCOREBOARD_A; 
1413                 }
1414                 if (y_inner != 0) {
1415                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1416                     score_dep |= MB_SCOREBOARD_B;
1417
1418                     if (x_inner != 0)
1419                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1420
1421                     if (x_inner != (mb_width -1)) {
1422                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1423                         score_dep |= MB_SCOREBOARD_C;
1424                     }
1425                 }
1426
1427                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1428                 *command_ptr++ = kernel;
1429                 *command_ptr++ = MPEG2_SCOREBOARD;
1430                 /* Indirect data */
1431                 *command_ptr++ = 0;
1432                 /* the (X, Y) term of scoreboard */
1433                 *command_ptr++ = ((y_inner << 16) | x_inner);
1434                 *command_ptr++ = score_dep;
1435                 /*inline data */
1436                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1437                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1438
1439                 x_inner -= 2;
1440                 y_inner += 1;
1441             }
1442             x_outer++;
1443             if (x_outer >= mb_width) {
1444                 y_outer += 1;
1445                 x_outer = xtemp_outer;
1446             }           
1447         }
1448     }
1449
1450     *command_ptr++ = 0;
1451     *command_ptr++ = MI_BATCH_BUFFER_END;
1452
1453     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1454     return;
1455 }
1456
1457 static int
1458 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1459                           VAPictureH264 *ref_list,
1460                           int num_pictures,
1461                           int dir)
1462 {
1463     int i, found = -1, min = 0x7FFFFFFF;
1464
1465     for (i = 0; i < num_pictures; i++) {
1466         int tmp;
1467
1468         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1469             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1470             break;
1471
1472         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1473
1474         if (dir)
1475             tmp = -tmp;
1476
1477         if (tmp > 0 && tmp < min) {
1478             min = tmp;
1479             found = i;
1480         }
1481     }
1482
1483     return found;
1484 }
1485
1486 void
1487 intel_avc_vme_reference_state(VADriverContextP ctx,
1488                               struct encode_state *encode_state,
1489                               struct intel_encoder_context *encoder_context,
1490                               int list_index,
1491                               int surface_index,
1492                               void (* vme_source_surface_state)(
1493                                   VADriverContextP ctx,
1494                                   int index,
1495                                   struct object_surface *obj_surface,
1496                                   struct intel_encoder_context *encoder_context))
1497 {
1498     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1499     struct object_surface *obj_surface = NULL;
1500     struct i965_driver_data *i965 = i965_driver_data(ctx);
1501     VASurfaceID ref_surface_id;
1502     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1503     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1504     int max_num_references;
1505     VAPictureH264 *curr_pic;
1506     VAPictureH264 *ref_list;
1507     int ref_idx;
1508
1509     if (list_index == 0) {
1510         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1511         ref_list = slice_param->RefPicList0;
1512     } else {
1513         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1514         ref_list = slice_param->RefPicList1;
1515     }
1516
1517     if (max_num_references == 1) {
1518         if (list_index == 0) {
1519             ref_surface_id = slice_param->RefPicList0[0].picture_id;
1520             vme_context->used_references[0] = &slice_param->RefPicList0[0];
1521         } else {
1522             ref_surface_id = slice_param->RefPicList1[0].picture_id;
1523             vme_context->used_references[1] = &slice_param->RefPicList1[0];
1524         }
1525
1526         if (ref_surface_id != VA_INVALID_SURFACE)
1527             obj_surface = SURFACE(ref_surface_id);
1528
1529         if (!obj_surface ||
1530             !obj_surface->bo) {
1531             obj_surface = encode_state->reference_objects[list_index];
1532             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1533         }
1534
1535         ref_idx = 0;
1536     } else {
1537         curr_pic = &pic_param->CurrPic;
1538
1539         /* select the reference frame in temporal space */
1540         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1541         ref_surface_id = ref_list[ref_idx].picture_id;
1542
1543         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1544             obj_surface = SURFACE(ref_surface_id);
1545
1546         vme_context->used_reference_objects[list_index] = obj_surface;
1547         vme_context->used_references[list_index] = &ref_list[ref_idx];
1548     }
1549
1550     if (obj_surface &&
1551         obj_surface->bo) {
1552         assert(ref_idx >= 0);
1553         vme_context->used_reference_objects[list_index] = obj_surface;
1554         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1555         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1556                                                     ref_idx << 16 |
1557                                                     ref_idx <<  8 |
1558                                                     ref_idx);
1559     } else {
1560         vme_context->used_reference_objects[list_index] = NULL;
1561         vme_context->used_references[list_index] = NULL;
1562         vme_context->ref_index_in_mb[list_index] = 0;
1563     }
1564 }
1565
1566 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1567                                         struct encode_state *encode_state,
1568                                         struct intel_encoder_context *encoder_context,
1569                                         int slice_index,
1570                                         struct intel_batchbuffer *slice_batch)
1571 {
1572     int count, i, start_index;
1573     unsigned int length_in_bits;
1574     VAEncPackedHeaderParameterBuffer *param = NULL;
1575     unsigned int *header_data = NULL;
1576     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1577     int slice_header_index;
1578
1579     if (encode_state->slice_header_index[slice_index] == 0)
1580         slice_header_index = -1;
1581     else
1582         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1583
1584     count = encode_state->slice_rawdata_count[slice_index];
1585     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1586
1587     for (i = 0; i < count; i++) {
1588         unsigned int skip_emul_byte_cnt;
1589
1590         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1591
1592         param = (VAEncPackedHeaderParameterBuffer *)
1593                     (encode_state->packed_header_params_ext[start_index + i]->buffer);
1594
1595         /* skip the slice header packed data type as it is lastly inserted */
1596         if (param->type == VAEncPackedHeaderSlice)
1597             continue;
1598
1599         length_in_bits = param->bit_length;
1600
1601         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1602
1603         /* as the slice header is still required, the last header flag is set to
1604          * zero.
1605          */
1606         mfc_context->insert_object(ctx,
1607                                    encoder_context,
1608                                    header_data,
1609                                    ALIGN(length_in_bits, 32) >> 5,
1610                                    length_in_bits & 0x1f,
1611                                    skip_emul_byte_cnt,
1612                                    0,
1613                                    0,
1614                                    !param->has_emulation_bytes,
1615                                    slice_batch);
1616     }
1617
1618     if (slice_header_index == -1) {
1619         unsigned char *slice_header = NULL;
1620         int slice_header_length_in_bits = 0;
1621         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1622         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1623         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1624
1625         /* No slice header data is passed. And the driver needs to generate it */
1626         /* For the Normal H264 */
1627         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1628                                                              pPicParameter,
1629                                                              pSliceParameter,
1630                                                              &slice_header);
1631         mfc_context->insert_object(ctx, encoder_context,
1632                                    (unsigned int *)slice_header,
1633                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1634                                    slice_header_length_in_bits & 0x1f,
1635                                    5,  /* first 5 bytes are start code + nal unit type */
1636                                    1, 0, 1, slice_batch);
1637
1638         free(slice_header);
1639     } else {
1640         unsigned int skip_emul_byte_cnt;
1641
1642         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1643
1644         param = (VAEncPackedHeaderParameterBuffer *)
1645                     (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1646         length_in_bits = param->bit_length;
1647
1648         /* as the slice header is the last header data for one slice,
1649          * the last header flag is set to one.
1650          */
1651         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1652
1653         mfc_context->insert_object(ctx,
1654                                    encoder_context,
1655                                    header_data,
1656                                    ALIGN(length_in_bits, 32) >> 5,
1657                                    length_in_bits & 0x1f,
1658                                    skip_emul_byte_cnt,
1659                                    1,
1660                                    0,
1661                                    !param->has_emulation_bytes,
1662                                    slice_batch);
1663     }
1664
1665     return;
1666 }
1667
1668 void
1669 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1670                                 struct encode_state *encode_state,
1671                                 struct intel_encoder_context *encoder_context)
1672 {
1673     struct i965_driver_data *i965 = i965_driver_data(ctx);
1674     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1675     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1676     int qp;
1677     dri_bo *bo;
1678     uint8_t *cost_table;
1679
1680     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1681
1682
1683     if (slice_type == SLICE_TYPE_I) {
1684         if (vme_context->i_qp_cost_table)
1685             return;
1686     } else if (slice_type == SLICE_TYPE_P) {
1687         if (vme_context->p_qp_cost_table)
1688             return;
1689     } else {
1690         if (vme_context->b_qp_cost_table)
1691             return;
1692     }
1693
1694     /* It is enough to allocate 32 bytes for each qp. */
1695     bo = dri_bo_alloc(i965->intel.bufmgr,
1696                       "cost_table ",
1697                       QP_MAX * 32,
1698                       64);
1699
1700     dri_bo_map(bo, 1);
1701     assert(bo->virtual);
1702     cost_table = (uint8_t *)(bo->virtual);
1703     for (qp = 0; qp < QP_MAX; qp++) {
1704         intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1705         cost_table += 32;
1706     }
1707
1708     dri_bo_unmap(bo);
1709
1710     if (slice_type == SLICE_TYPE_I) {
1711         vme_context->i_qp_cost_table = bo;
1712     } else if (slice_type == SLICE_TYPE_P) {
1713         vme_context->p_qp_cost_table = bo;
1714     } else {
1715         vme_context->b_qp_cost_table = bo;
1716     }
1717
1718     vme_context->cost_table_size = QP_MAX * 32;
1719     return;
1720 }
1721
1722 extern void
1723 intel_h264_setup_cost_surface(VADriverContextP ctx,
1724                               struct encode_state *encode_state,
1725                               struct intel_encoder_context *encoder_context,
1726                               unsigned long binding_table_offset,
1727                               unsigned long surface_state_offset)
1728 {
1729     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1730     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1731     dri_bo *bo;
1732
1733
1734     struct i965_buffer_surface cost_table;
1735
1736     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1737
1738
1739     if (slice_type == SLICE_TYPE_I) {
1740         bo = vme_context->i_qp_cost_table;
1741     } else if (slice_type == SLICE_TYPE_P) {
1742         bo = vme_context->p_qp_cost_table;
1743     } else {
1744         bo = vme_context->b_qp_cost_table;
1745     }
1746
1747     cost_table.bo = bo;
1748     cost_table.num_blocks = QP_MAX;
1749     cost_table.pitch = 16;
1750     cost_table.size_block = 32;
1751
1752     vme_context->vme_buffer_suface_setup(ctx,
1753                                          &vme_context->gpe_context,
1754                                          &cost_table,
1755                                          binding_table_offset,
1756                                          surface_state_offset);
1757 }
1758
1759 /*
1760  * the idea of conversion between qp and qstep comes from scaling process
1761  * of transform coeff for Luma component in H264 spec.
1762  *   2^(Qpy / 6 - 6)
1763  * In order to avoid too small qstep, it is multiplied by 16.
1764  */
1765 static float intel_h264_qp_qstep(int qp)
1766 {
1767     float value, qstep;
1768     value = qp;
1769     value = value / 6 - 2;
1770     qstep = powf(2, value);
1771     return qstep;
1772 }
1773
1774 static int intel_h264_qstep_qp(float qstep)
1775 {
1776     float qp;
1777
1778     qp = 12.0f + 6.0f * log2f(qstep);
1779
1780     return floorf(qp);
1781 }
1782
1783 /*
1784  * Currently it is based on the following assumption:
1785  * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1786  *                                 total_aread * 1 / baseqp_qstep
1787  *
1788  * qstep is the linearized quantizer of H264 quantizer
1789  */
1790 typedef struct {
1791     int row_start_in_mb;
1792     int row_end_in_mb;
1793     int col_start_in_mb;
1794     int col_end_in_mb;
1795
1796     int width_mbs;
1797     int height_mbs;
1798
1799     int roi_qp;
1800 } ROIRegionParam;
1801
1802 static VAStatus
1803 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1804                        int base_qp,
1805                        VAEncMiscParameterBufferROI *pMiscParamROI,
1806                        struct encode_state *encode_state,
1807                        struct intel_encoder_context *encoder_context)
1808 {
1809     int nonroi_qp;
1810     VAEncROI *region_roi;
1811     bool quickfill = 0;
1812
1813     ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1814     int num_roi = 0;
1815     int i,j;
1816
1817     float temp;
1818     float qstep_nonroi, qstep_base;
1819     float roi_area, total_area, nonroi_area;
1820     float sum_roi;
1821
1822     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1823     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1824     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1825     int mbs_in_picture = width_in_mbs * height_in_mbs;
1826
1827     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1828     VAStatus vaStatus = VA_STATUS_SUCCESS;
1829
1830     if(pMiscParamROI != NULL)
1831     {
1832         num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi;
1833
1834         /* currently roi_value_is_qp_delta is the only supported mode of priority.
1835         *
1836         * qp_delta set by user is added to base_qp, which is then clapped by
1837         * [base_qp-min_delta, base_qp+max_delta].
1838         */
1839         ASSERT_RET(pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta,VA_STATUS_ERROR_INVALID_PARAMETER);
1840     }
1841
1842     /* when the base_qp is lower than 12, the quality is quite good based
1843      * on the H264 test experience.
1844      * In such case it is unnecessary to adjust the quality for ROI region.
1845      */
1846     if (base_qp <= 12) {
1847         nonroi_qp = base_qp;
1848         quickfill = 1;
1849         goto qp_fill;
1850     }
1851
1852     sum_roi = 0.0f;
1853     roi_area = 0;
1854     for (i = 0; i < num_roi; i++) {
1855         int row_start, row_end, col_start, col_end;
1856         int roi_width_mbs, roi_height_mbs;
1857         int mbs_in_roi;
1858         int roi_qp;
1859         float qstep_roi;
1860
1861         region_roi =  (VAEncROI *)pMiscParamROI->roi + i;
1862
1863         col_start = region_roi->roi_rectangle.x;
1864         col_end = col_start + region_roi->roi_rectangle.width;
1865         row_start = region_roi->roi_rectangle.y;
1866         row_end = row_start + region_roi->roi_rectangle.height;
1867         col_start = col_start / 16;
1868         col_end = (col_end + 15) / 16;
1869         row_start = row_start / 16;
1870         row_end = (row_end + 15) / 16;
1871
1872         roi_width_mbs = col_end - col_start;
1873         roi_height_mbs = row_end - row_start;
1874         mbs_in_roi = roi_width_mbs * roi_height_mbs;
1875
1876         param_regions[i].row_start_in_mb = row_start;
1877         param_regions[i].row_end_in_mb = row_end;
1878         param_regions[i].col_start_in_mb = col_start;
1879         param_regions[i].col_end_in_mb = col_end;
1880         param_regions[i].width_mbs = roi_width_mbs;
1881         param_regions[i].height_mbs = roi_height_mbs;
1882
1883         roi_qp = base_qp + region_roi->roi_value;
1884         BRC_CLIP(roi_qp, 1, 51);
1885
1886         param_regions[i].roi_qp = roi_qp;
1887         qstep_roi = intel_h264_qp_qstep(roi_qp);
1888
1889         roi_area += mbs_in_roi;
1890         sum_roi += mbs_in_roi / qstep_roi;
1891     }
1892
1893     total_area = mbs_in_picture;
1894     nonroi_area = total_area - roi_area;
1895
1896     qstep_base = intel_h264_qp_qstep(base_qp);
1897     temp = (total_area / qstep_base - sum_roi);
1898
1899     if (temp < 0) {
1900         nonroi_qp = 51;
1901     } else {
1902         qstep_nonroi = nonroi_area / temp;
1903         nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
1904     }
1905
1906     BRC_CLIP(nonroi_qp, 1, 51);
1907
1908 qp_fill:
1909     memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
1910     if (!quickfill) {
1911         char *qp_ptr;
1912
1913         for (i = 0; i < num_roi; i++) {
1914             for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
1915                 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
1916                 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
1917             }
1918         }
1919     }
1920     return vaStatus;
1921 }
1922
1923 extern void
1924 intel_h264_enc_roi_config(VADriverContextP ctx,
1925                           struct encode_state *encode_state,
1926                           struct intel_encoder_context *encoder_context)
1927 {
1928     char *qp_ptr;
1929     int i, j;
1930     VAEncROI *region_roi;
1931     struct i965_driver_data *i965 = i965_driver_data(ctx);
1932     VAEncMiscParameterBuffer* pMiscParamROI;
1933     VAEncMiscParameterBufferROI *pParamROI = NULL;
1934     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1935     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1936     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1937     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1938     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1939
1940     int row_start, row_end, col_start, col_end;
1941     int num_roi = 0;
1942
1943     vme_context->roi_enabled = 0;
1944     /* Restriction: Disable ROI when multi-slice is enabled */
1945     if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1946         return;
1947
1948     if (encode_state->misc_param[VAEncMiscParameterTypeROI][0] != NULL) {
1949         pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI][0]->buffer;
1950         pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
1951
1952         /* check whether number of ROI is correct */
1953         num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi;
1954     }
1955
1956     if (num_roi > 0)
1957         vme_context->roi_enabled = 1;
1958
1959     if (!vme_context->roi_enabled)
1960         return;
1961
1962     if ((vme_context->saved_width_mbs !=  width_in_mbs) ||
1963         (vme_context->saved_height_mbs != height_in_mbs)) {
1964         free(vme_context->qp_per_mb);
1965         vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1966
1967         vme_context->saved_width_mbs = width_in_mbs;
1968         vme_context->saved_height_mbs = height_in_mbs;
1969         assert(vme_context->qp_per_mb);
1970     }
1971     if (encoder_context->rate_control_mode == VA_RC_CBR) {
1972         /*
1973          * TODO: More complex Qp adjust needs to be added.
1974          * Currently it is initialized to slice_qp.
1975          */
1976         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1977         int qp;
1978         int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1979
1980         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1981         intel_h264_enc_roi_cbr(ctx, qp, pParamROI,encode_state, encoder_context);
1982
1983     } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1984         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1985         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1986         int qp;
1987
1988         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1989         memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1990
1991
1992         for (j = num_roi; j ; j--) {
1993             int qp_delta, qp_clip;
1994
1995             region_roi =  (VAEncROI *)pParamROI->roi + j - 1;
1996
1997             col_start = region_roi->roi_rectangle.x;
1998             col_end = col_start + region_roi->roi_rectangle.width;
1999             row_start = region_roi->roi_rectangle.y;
2000             row_end = row_start + region_roi->roi_rectangle.height;
2001
2002             col_start = col_start / 16;
2003             col_end = (col_end + 15) / 16;
2004             row_start = row_start / 16;
2005             row_end = (row_end + 15) / 16;
2006
2007             qp_delta = region_roi->roi_value;
2008             qp_clip = qp + qp_delta;
2009
2010             BRC_CLIP(qp_clip, 1, 51);
2011
2012             for (i = row_start; i < row_end; i++) {
2013                 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
2014                 memset(qp_ptr, qp_clip, (col_end - col_start));
2015             }
2016         }
2017     } else {
2018         /*
2019          * TODO: Disable it for non CBR-CQP.
2020          */
2021         vme_context->roi_enabled = 0;
2022     }
2023
2024     if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
2025         encoder_context->soft_batch_force = 1;
2026
2027     return;
2028 }
2029
2030 /* HEVC */
2031 static int
2032 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
2033                            VAPictureHEVC *ref_list,
2034                            int num_pictures,
2035                            int dir)
2036 {
2037     int i, found = -1, min = 0x7FFFFFFF;
2038
2039     for (i = 0; i < num_pictures; i++) {
2040         int tmp;
2041
2042         if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
2043             (ref_list[i].picture_id == VA_INVALID_SURFACE))
2044             break;
2045
2046         tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
2047
2048         if (dir)
2049             tmp = -tmp;
2050
2051         if (tmp > 0 && tmp < min) {
2052             min = tmp;
2053             found = i;
2054         }
2055     }
2056
2057     return found;
2058 }
2059 void
2060 intel_hevc_vme_reference_state(VADriverContextP ctx,
2061                                struct encode_state *encode_state,
2062                                struct intel_encoder_context *encoder_context,
2063                                int list_index,
2064                                int surface_index,
2065                                void (* vme_source_surface_state)(
2066                                    VADriverContextP ctx,
2067                                    int index,
2068                                    struct object_surface *obj_surface,
2069                                    struct intel_encoder_context *encoder_context))
2070 {
2071     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2072     struct object_surface *obj_surface = NULL;
2073     struct i965_driver_data *i965 = i965_driver_data(ctx);
2074     VASurfaceID ref_surface_id;
2075     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2076     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2077     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2078     int max_num_references;
2079     VAPictureHEVC *curr_pic;
2080     VAPictureHEVC *ref_list;
2081     int ref_idx;
2082     unsigned int is_hevc10 = 0;
2083     GenHevcSurface *hevc_encoder_surface = NULL;
2084
2085     if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2086         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2087         is_hevc10 = 1;
2088
2089     if (list_index == 0) {
2090         max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2091         ref_list = slice_param->ref_pic_list0;
2092     } else {
2093         max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2094         ref_list = slice_param->ref_pic_list1;
2095     }
2096
2097     if (max_num_references == 1) {
2098         if (list_index == 0) {
2099             ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2100             vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2101         } else {
2102             ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2103             vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2104         }
2105
2106         if (ref_surface_id != VA_INVALID_SURFACE)
2107             obj_surface = SURFACE(ref_surface_id);
2108
2109         if (!obj_surface ||
2110             !obj_surface->bo) {
2111             obj_surface = encode_state->reference_objects[list_index];
2112             vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2113         }
2114
2115         ref_idx = 0;
2116     } else {
2117         curr_pic = &pic_param->decoded_curr_pic;
2118
2119         /* select the reference frame in temporal space */
2120         ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2121         ref_surface_id = ref_list[ref_idx].picture_id;
2122
2123         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2124             obj_surface = SURFACE(ref_surface_id);
2125
2126         vme_context->used_reference_objects[list_index] = obj_surface;
2127         vme_context->used_references[list_index] = &ref_list[ref_idx];
2128     }
2129
2130     if (obj_surface &&
2131         obj_surface->bo) {
2132         assert(ref_idx >= 0);
2133         vme_context->used_reference_objects[list_index] = obj_surface;
2134
2135         if(is_hevc10){
2136             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2137             assert(hevc_encoder_surface);
2138             obj_surface = hevc_encoder_surface->nv12_surface_obj;
2139         }
2140         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2141         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2142                 ref_idx << 16 |
2143                 ref_idx <<  8 |
2144                 ref_idx);
2145     } else {
2146         vme_context->used_reference_objects[list_index] = NULL;
2147         vme_context->used_references[list_index] = NULL;
2148         vme_context->ref_index_in_mb[list_index] = 0;
2149     }
2150 }
2151
2152 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2153                                      struct encode_state *encode_state,
2154                                      struct intel_encoder_context *encoder_context)
2155 {
2156     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2157     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2158     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2159     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2160     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2161     int qp, m_cost, j, mv_count;
2162     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2163     float   lambda, m_costf;
2164
2165     /* here no SI SP slice for HEVC, do not need slice fixup */
2166     int slice_type = slice_param->slice_type;
2167
2168
2169     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2170
2171     if(encoder_context->rate_control_mode == VA_RC_CBR)
2172     {
2173         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2174         if(slice_type == HEVC_SLICE_B) {
2175             if(pSequenceParameter->ip_period == 1)
2176             {
2177                 slice_type = HEVC_SLICE_P;
2178                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2179
2180             }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2181                 slice_type = HEVC_SLICE_P;
2182                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2183             }
2184         }
2185
2186     }
2187
2188     if (vme_state_message == NULL)
2189         return;
2190
2191     assert(qp <= QP_MAX);
2192     lambda = intel_lambda_qp(qp);
2193     if (slice_type == HEVC_SLICE_I) {
2194         vme_state_message[MODE_INTRA_16X16] = 0;
2195         m_cost = lambda * 4;
2196         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2197         m_cost = lambda * 16;
2198         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2199         m_cost = lambda * 3;
2200         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2201     } else {
2202         m_cost = 0;
2203         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2204         for (j = 1; j < 3; j++) {
2205             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2206             m_cost = (int)m_costf;
2207             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2208         }
2209         mv_count = 3;
2210         for (j = 4; j <= 64; j *= 2) {
2211             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2212             m_cost = (int)m_costf;
2213             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2214             mv_count++;
2215         }
2216
2217         if (qp <= 25) {
2218             vme_state_message[MODE_INTRA_16X16] = 0x4a;
2219             vme_state_message[MODE_INTRA_8X8] = 0x4a;
2220             vme_state_message[MODE_INTRA_4X4] = 0x4a;
2221             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2222             vme_state_message[MODE_INTER_16X16] = 0x4a;
2223             vme_state_message[MODE_INTER_16X8] = 0x4a;
2224             vme_state_message[MODE_INTER_8X8] = 0x4a;
2225             vme_state_message[MODE_INTER_8X4] = 0x4a;
2226             vme_state_message[MODE_INTER_4X4] = 0x4a;
2227             vme_state_message[MODE_INTER_BWD] = 0x2a;
2228             return;
2229         }
2230         m_costf = lambda * 10;
2231         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2232         m_cost = lambda * 14;
2233         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2234         m_cost = lambda * 24;
2235         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2236         m_costf = lambda * 3.5;
2237         m_cost = m_costf;
2238         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2239         if (slice_type == HEVC_SLICE_P) {
2240             m_costf = lambda * 2.5;
2241             m_cost = m_costf;
2242             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2243             m_costf = lambda * 4;
2244             m_cost = m_costf;
2245             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2246             m_costf = lambda * 1.5;
2247             m_cost = m_costf;
2248             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2249             m_costf = lambda * 3;
2250             m_cost = m_costf;
2251             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2252             m_costf = lambda * 5;
2253             m_cost = m_costf;
2254             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2255             /* BWD is not used in P-frame */
2256             vme_state_message[MODE_INTER_BWD] = 0;
2257         } else {
2258             m_costf = lambda * 2.5;
2259             m_cost = m_costf;
2260             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2261             m_costf = lambda * 5.5;
2262             m_cost = m_costf;
2263             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2264             m_costf = lambda * 3.5;
2265             m_cost = m_costf;
2266             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2267             m_costf = lambda * 5.0;
2268             m_cost = m_costf;
2269             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2270             m_costf = lambda * 6.5;
2271             m_cost = m_costf;
2272             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2273             m_costf = lambda * 1.5;
2274             m_cost = m_costf;
2275             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
2276         }
2277     }
2278 }