OSDN Git Service

a30ace229fe84c99cf15f14ffaa2eb4a720ce111
[android-x86/hardware-intel-common-vaapi.git] / src / gen6_mfc_common.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "gen9_mfc.h"
45 #include "intel_media.h"
46
47 #ifndef HAVE_LOG2F
48 #define log2f(x) (logf(x)/(float)M_LN2)
49 #endif
50
51 int intel_avc_enc_slice_type_fixup(int slice_type)
52 {
53     if (slice_type == SLICE_TYPE_SP ||
54         slice_type == SLICE_TYPE_P)
55         slice_type = SLICE_TYPE_P;
56     else if (slice_type == SLICE_TYPE_SI ||
57              slice_type == SLICE_TYPE_I)
58         slice_type = SLICE_TYPE_I;
59     else {
60         if (slice_type != SLICE_TYPE_B)
61             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
62
63         slice_type = SLICE_TYPE_B;
64     }
65
66     return slice_type;
67 }
68
69 static void
70 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
71                                         struct intel_encoder_context *encoder_context)
72 {
73     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
74     int i;
75
76     for(i = 0 ; i < 3; i++) {
77         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
78         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
79         mfc_context->bit_rate_control_context[i].GrowInit = 6;
80         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
81         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
82         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
83         
84         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
85         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
86         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
87         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
88         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
89         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
90     }
91 }
92
93 static void intel_mfc_brc_init(struct encode_state *encode_state,
94                                struct intel_encoder_context* encoder_context)
95 {
96     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97     double bitrate = encoder_context->brc.bits_per_second[0];
98     double framerate = (double)encoder_context->brc.framerate_per_100s[0] / 100.0;
99     int inum = encoder_context->brc.num_iframes_in_gop,
100         pnum = encoder_context->brc.num_pframes_in_gop,
101         bnum = encoder_context->brc.num_bframes_in_gop; /* Gop structure: number of I, P, B frames in the Gop. */
102     int intra_period = encoder_context->brc.gop_size;
103     double qp1_size = 0.1 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
104     double qp51_size = 0.001 * 8 * 3 * encoder_context->frame_width_in_pixel * encoder_context->frame_height_in_pixel / 2;
105     double bpf;
106     int i;
107
108     mfc_context->brc.mode = encoder_context->rate_control_mode;
109
110     for (i = 0; i < 3; i++) {
111         mfc_context->brc.qp_prime_y[i] = 26;
112     }
113
114     mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
115                                                              (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
116     mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
117     mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
118
119     mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
120     mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
121     mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
122
123     bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
124
125     mfc_context->hrd.buffer_size = encoder_context->brc.hrd_buffer_size;
126     mfc_context->hrd.current_buffer_fullness =
127         (double)(encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
128         encoder_context->brc.hrd_initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
129     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
130     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
131     mfc_context->hrd.violation_noted = 0;
132
133     if ((bpf > qp51_size) && (bpf < qp1_size)) {
134         mfc_context->brc.qp_prime_y[SLICE_TYPE_P] = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
135     }
136     else if (bpf >= qp1_size)
137         mfc_context->brc.qp_prime_y[SLICE_TYPE_P] = 1;
138     else if (bpf <= qp51_size)
139         mfc_context->brc.qp_prime_y[SLICE_TYPE_P] = 51;
140
141     mfc_context->brc.qp_prime_y[SLICE_TYPE_I] = mfc_context->brc.qp_prime_y[SLICE_TYPE_P];
142     mfc_context->brc.qp_prime_y[SLICE_TYPE_B] = mfc_context->brc.qp_prime_y[SLICE_TYPE_I];
143
144     BRC_CLIP(mfc_context->brc.qp_prime_y[SLICE_TYPE_I], 1, 51);
145     BRC_CLIP(mfc_context->brc.qp_prime_y[SLICE_TYPE_P], 1, 51);
146     BRC_CLIP(mfc_context->brc.qp_prime_y[SLICE_TYPE_B], 1, 51);
147 }
148
149 int intel_mfc_update_hrd(struct encode_state *encode_state,
150                          struct intel_encoder_context *encoder_context,
151                          int frame_bits)
152 {
153     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
154     double prev_bf = mfc_context->hrd.current_buffer_fullness;
155
156     mfc_context->hrd.current_buffer_fullness -= frame_bits;
157
158     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
159         mfc_context->hrd.current_buffer_fullness = prev_bf;
160         return BRC_UNDERFLOW;
161     }
162     
163     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
164     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
165         if (mfc_context->brc.mode == VA_RC_VBR)
166             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
167         else {
168             mfc_context->hrd.current_buffer_fullness = prev_bf;
169             return BRC_OVERFLOW;
170         }
171     }
172     return BRC_NO_HRD_VIOLATION;
173 }
174
175 int intel_mfc_brc_postpack(struct encode_state *encode_state,
176                            struct intel_encoder_context *encoder_context,
177                            int frame_bits)
178 {
179     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
180     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
181     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
182     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
183     int qpi = mfc_context->brc.qp_prime_y[SLICE_TYPE_I];
184     int qpp = mfc_context->brc.qp_prime_y[SLICE_TYPE_P];
185     int qpb = mfc_context->brc.qp_prime_y[SLICE_TYPE_B];
186     int qp; // quantizer of previously encoded slice of current type
187     int qpn; // predicted quantizer for next frame of current type in integer format
188     double qpf; // predicted quantizer for next frame of current type in float format
189     double delta_qp; // QP correction
190     int target_frame_size, frame_size_next;
191     /* Notes:
192      *  x - how far we are from HRD buffer borders
193      *  y - how far we are from target HRD buffer fullness
194      */
195     double x, y;
196     double frame_size_alpha;
197
198     qp = mfc_context->brc.qp_prime_y[slicetype];
199
200     target_frame_size = mfc_context->brc.target_frame_size[slicetype];
201     if (mfc_context->hrd.buffer_capacity < 5)
202         frame_size_alpha = 0;
203     else
204         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
205     if (frame_size_alpha > 30) frame_size_alpha = 30;
206     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
207         (double)(frame_size_alpha + 1.);
208
209     /* frame_size_next: avoiding negative number and too small value */
210     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
211         frame_size_next = (int)((double)target_frame_size * 0.25);
212
213     qpf = (double)qp * target_frame_size / frame_size_next;
214     qpn = (int)(qpf + 0.5);
215
216     if (qpn == qp) {
217         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
218         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
219         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
220             qpn++;
221             mfc_context->brc.qpf_rounding_accumulator = 0.;
222         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
223             qpn--;
224             mfc_context->brc.qpf_rounding_accumulator = 0.;
225         }
226     }
227     /* making sure that QP is not changing too fast */
228     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
229     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
230     /* making sure that with QP predictions we did do not leave QPs range */
231     BRC_CLIP(qpn, 1, 51);
232
233     /* checking wthether HRD compliance is still met */
234     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
235
236     /* calculating QP delta as some function*/
237     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
238     if (x > 0) {
239         x /= mfc_context->hrd.target_buffer_fullness;
240         y = mfc_context->hrd.current_buffer_fullness;
241     }
242     else {
243         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
244         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
245     }
246     if (y < 0.01) y = 0.01;
247     if (x > 1) x = 1;
248     else if (x < -1) x = -1;
249
250     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
251     qpn = (int)(qpn + delta_qp + 0.5);
252
253     /* making sure that with QP predictions we did do not leave QPs range */
254     BRC_CLIP(qpn, 1, 51);
255
256     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
257         /* correcting QPs of slices of other types */
258         if (slicetype == SLICE_TYPE_P) {
259             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
260                 mfc_context->brc.qp_prime_y[SLICE_TYPE_B] += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
261             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
262                 mfc_context->brc.qp_prime_y[SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
263         } else if (slicetype == SLICE_TYPE_I) {
264             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
265                 mfc_context->brc.qp_prime_y[SLICE_TYPE_B] += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
266             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
267                 mfc_context->brc.qp_prime_y[SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
268         } else { // SLICE_TYPE_B
269             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
270                 mfc_context->brc.qp_prime_y[SLICE_TYPE_P] += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
271             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
272                 mfc_context->brc.qp_prime_y[SLICE_TYPE_I] += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
273         }
274         BRC_CLIP(mfc_context->brc.qp_prime_y[SLICE_TYPE_I], 1, 51);
275         BRC_CLIP(mfc_context->brc.qp_prime_y[SLICE_TYPE_P], 1, 51);
276         BRC_CLIP(mfc_context->brc.qp_prime_y[SLICE_TYPE_B], 1, 51);
277     } else if (sts == BRC_UNDERFLOW) { // underflow
278         if (qpn <= qp) qpn = qp + 1;
279         if (qpn > 51) {
280             qpn = 51;
281             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
282         }
283     } else if (sts == BRC_OVERFLOW) {
284         if (qpn >= qp) qpn = qp - 1;
285         if (qpn < 1) { // < 0 (?) overflow with minQP
286             qpn = 1;
287             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
288         }
289     }
290
291     mfc_context->brc.qp_prime_y[slicetype] = qpn;
292
293     return sts;
294 }
295
296 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
297                                        struct intel_encoder_context *encoder_context)
298 {
299     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
300     unsigned int rate_control_mode = encoder_context->rate_control_mode;
301     int target_bit_rate = encoder_context->brc.bits_per_second[0];
302     
303     // current we only support CBR mode.
304     if (rate_control_mode == VA_RC_CBR) {
305         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
306         mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
307         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
308         mfc_context->vui_hrd.i_frame_number = 0;
309
310         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; 
311         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
312         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
313     }
314
315 }
316
317 void 
318 intel_mfc_hrd_context_update(struct encode_state *encode_state, 
319                              struct gen6_mfc_context *mfc_context)
320 {
321     mfc_context->vui_hrd.i_frame_number++;
322 }
323
324 int intel_mfc_interlace_check(VADriverContextP ctx,
325                               struct encode_state *encode_state,
326                               struct intel_encoder_context *encoder_context)
327 {
328     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
329     VAEncSliceParameterBufferH264 *pSliceParameter;
330     int i;
331     int mbCount = 0;
332     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
333     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
334   
335     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
336         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer; 
337         mbCount += pSliceParameter->num_macroblocks; 
338     }
339     
340     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
341         return 0;
342
343     return 1;
344 }
345
346 void intel_mfc_brc_prepare(struct encode_state *encode_state,
347                            struct intel_encoder_context *encoder_context)
348 {
349     unsigned int rate_control_mode = encoder_context->rate_control_mode;
350
351     if (encoder_context->codec != CODEC_H264 &&
352         encoder_context->codec != CODEC_H264_MVC)
353         return;
354
355     if (rate_control_mode == VA_RC_CBR) {
356         /*Programing bit rate control */
357         if (encoder_context->brc.need_reset) {
358             intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
359             intel_mfc_brc_init(encode_state, encoder_context);
360         }
361
362         /*Programing HRD control */
363         if (encoder_context->brc.need_reset)
364             intel_mfc_hrd_context_init(encode_state, encoder_context);    
365     }
366 }
367
368 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
369                                               struct encode_state *encode_state,
370                                               struct intel_encoder_context *encoder_context,
371                                               struct intel_batchbuffer *slice_batch)
372 {
373     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
374     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
375     unsigned int rate_control_mode = encoder_context->rate_control_mode;
376     unsigned int skip_emul_byte_cnt;
377
378     if (encode_state->packed_header_data[idx]) {
379         VAEncPackedHeaderParameterBuffer *param = NULL;
380         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
381         unsigned int length_in_bits;
382
383         assert(encode_state->packed_header_param[idx]);
384         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
385         length_in_bits = param->bit_length;
386
387         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
388         mfc_context->insert_object(ctx,
389                                    encoder_context,
390                                    header_data,
391                                    ALIGN(length_in_bits, 32) >> 5,
392                                    length_in_bits & 0x1f,
393                                    skip_emul_byte_cnt,
394                                    0,
395                                    0,
396                                    !param->has_emulation_bytes,
397                                    slice_batch);
398     }
399
400     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
401
402     if (encode_state->packed_header_data[idx]) {
403         VAEncPackedHeaderParameterBuffer *param = NULL;
404         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
405         unsigned int length_in_bits;
406
407         assert(encode_state->packed_header_param[idx]);
408         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
409         length_in_bits = param->bit_length;
410
411         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
412
413         mfc_context->insert_object(ctx,
414                                    encoder_context,
415                                    header_data,
416                                    ALIGN(length_in_bits, 32) >> 5,
417                                    length_in_bits & 0x1f,
418                                    skip_emul_byte_cnt,
419                                    0,
420                                    0,
421                                    !param->has_emulation_bytes,
422                                    slice_batch);
423     }
424     
425     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
426
427     if (encode_state->packed_header_data[idx]) {
428         VAEncPackedHeaderParameterBuffer *param = NULL;
429         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
430         unsigned int length_in_bits;
431
432         assert(encode_state->packed_header_param[idx]);
433         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
434         length_in_bits = param->bit_length;
435
436         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
437         mfc_context->insert_object(ctx,
438                                    encoder_context,
439                                    header_data,
440                                    ALIGN(length_in_bits, 32) >> 5,
441                                    length_in_bits & 0x1f,
442                                    skip_emul_byte_cnt,
443                                    0,
444                                    0,
445                                    !param->has_emulation_bytes,
446                                    slice_batch);
447     } else if (rate_control_mode == VA_RC_CBR) {
448         // this is frist AU
449         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
450
451         unsigned char *sei_data = NULL;
452     
453         int length_in_bits = build_avc_sei_buffer_timing(
454             mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
455             mfc_context->vui_hrd.i_initial_cpb_removal_delay,
456             0,
457             mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
458             mfc_context->vui_hrd.i_dpb_output_delay_length,
459             0,
460             &sei_data);
461         mfc_context->insert_object(ctx,
462                                    encoder_context,
463                                    (unsigned int *)sei_data,
464                                    ALIGN(length_in_bits, 32) >> 5,
465                                    length_in_bits & 0x1f,
466                                    5,
467                                    0,   
468                                    0,   
469                                    1,
470                                    slice_batch);  
471         free(sei_data);
472     }
473 }
474
475 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx, 
476                                struct encode_state *encode_state,
477                                struct intel_encoder_context *encoder_context)
478 {
479     struct i965_driver_data *i965 = i965_driver_data(ctx);
480     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
481     struct object_surface *obj_surface; 
482     struct object_buffer *obj_buffer;
483     GenAvcSurface *gen6_avc_surface;
484     dri_bo *bo;
485     VAStatus vaStatus = VA_STATUS_SUCCESS;
486     int i, j, enable_avc_ildb = 0;
487     VAEncSliceParameterBufferH264 *slice_param;
488     struct i965_coded_buffer_segment *coded_buffer_segment;
489     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
490     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
491     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
492
493     if (IS_GEN6(i965->intel.device_info)) {
494         /* On the SNB it should be fixed to 128 for the DMV buffer */
495         width_in_mbs = 128;
496     }
497
498     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
499         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
500         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
501
502         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
503             assert((slice_param->slice_type == SLICE_TYPE_I) ||
504                    (slice_param->slice_type == SLICE_TYPE_SI) ||
505                    (slice_param->slice_type == SLICE_TYPE_P) ||
506                    (slice_param->slice_type == SLICE_TYPE_SP) ||
507                    (slice_param->slice_type == SLICE_TYPE_B));
508
509             if (slice_param->disable_deblocking_filter_idc != 1) {
510                 enable_avc_ildb = 1;
511                 break;
512             }
513
514             slice_param++;
515         }
516     }
517
518     /*Setup all the input&output object*/
519
520     /* Setup current frame and current direct mv buffer*/
521     obj_surface = encode_state->reconstructed_object;
522     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
523
524     if ( obj_surface->private_data == NULL) {
525         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
526         assert(gen6_avc_surface);
527         gen6_avc_surface->dmv_top = 
528             dri_bo_alloc(i965->intel.bufmgr,
529                          "Buffer",
530                          68 * width_in_mbs * height_in_mbs, 
531                          64);
532         gen6_avc_surface->dmv_bottom = 
533             dri_bo_alloc(i965->intel.bufmgr,
534                          "Buffer",
535                          68 * width_in_mbs * height_in_mbs, 
536                          64);
537         assert(gen6_avc_surface->dmv_top);
538         assert(gen6_avc_surface->dmv_bottom);
539         obj_surface->private_data = (void *)gen6_avc_surface;
540         obj_surface->free_private_data = (void *)gen_free_avc_surface; 
541     }
542     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
543     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
544     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
545     dri_bo_reference(gen6_avc_surface->dmv_top);
546     dri_bo_reference(gen6_avc_surface->dmv_bottom);
547
548     if (enable_avc_ildb) {
549         mfc_context->post_deblocking_output.bo = obj_surface->bo;
550         dri_bo_reference(mfc_context->post_deblocking_output.bo);
551     } else {
552         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
553         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
554     }
555
556     mfc_context->surface_state.width = obj_surface->orig_width;
557     mfc_context->surface_state.height = obj_surface->orig_height;
558     mfc_context->surface_state.w_pitch = obj_surface->width;
559     mfc_context->surface_state.h_pitch = obj_surface->height;
560     
561     /* Setup reference frames and direct mv buffers*/
562     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
563         obj_surface = encode_state->reference_objects[i];
564         
565         if (obj_surface && obj_surface->bo) {
566             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
567             dri_bo_reference(obj_surface->bo);
568
569             /* Check DMV buffer */
570             if ( obj_surface->private_data == NULL) {
571                 
572                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
573                 assert(gen6_avc_surface);
574                 gen6_avc_surface->dmv_top = 
575                     dri_bo_alloc(i965->intel.bufmgr,
576                                  "Buffer",
577                                  68 * width_in_mbs * height_in_mbs, 
578                                  64);
579                 gen6_avc_surface->dmv_bottom = 
580                     dri_bo_alloc(i965->intel.bufmgr,
581                                  "Buffer",
582                                  68 * width_in_mbs * height_in_mbs, 
583                                  64);
584                 assert(gen6_avc_surface->dmv_top);
585                 assert(gen6_avc_surface->dmv_bottom);
586                 obj_surface->private_data = gen6_avc_surface;
587                 obj_surface->free_private_data = gen_free_avc_surface; 
588             }
589     
590             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
591             /* Setup DMV buffer */
592             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
593             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
594             dri_bo_reference(gen6_avc_surface->dmv_top);
595             dri_bo_reference(gen6_avc_surface->dmv_bottom);
596         } else {
597             break;
598         }
599     }
600
601     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
602     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
603
604     obj_buffer = encode_state->coded_buf_object;
605     bo = obj_buffer->buffer_store->bo;
606     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
607     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
608     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
609     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
610     
611     dri_bo_map(bo, 1);
612     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
613     coded_buffer_segment->mapped = 0;
614     coded_buffer_segment->codec = encoder_context->codec;
615     dri_bo_unmap(bo);
616
617     return vaStatus;
618 }
619 /*
620  * The LUT uses the pair of 4-bit units: (shift, base) structure.
621  * 2^K * X = value . 
622  * So it is necessary to convert one cost into the nearest LUT format.
623  * The derivation is:
624  * 2^K *x = 2^n * (1 + deltaX)
625  *    k + log2(x) = n + log2(1 + deltaX)
626  *    log2(x) = n - k + log2(1 + deltaX)
627  *    As X is in the range of [1, 15]
628  *      4 > n - k + log2(1 + deltaX) >= 0 
629  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
630  *    Then we can derive the corresponding K and get the nearest LUT format.
631  */
632 int intel_format_lutvalue(int value, int max)
633 {
634     int ret;
635     int logvalue, temp1, temp2;
636
637     if (value <= 0)
638         return 0;
639
640     logvalue = (int)(log2f((float)value));
641     if (logvalue < 4) {
642         ret = value;
643     } else {
644         int error, temp_value, base, j, temp_err;
645         error = value;
646         j = logvalue - 4 + 1;
647         ret = -1;
648         for(; j <= logvalue; j++) {
649             if (j == 0) {
650                 base = value >> j;
651             } else {
652                 base = (value + (1 << (j - 1)) - 1) >> j;
653             }
654             if (base >= 16)
655                 continue;
656
657             temp_value = base << j;
658             temp_err = abs(value - temp_value);
659             if (temp_err < error) {
660                 error = temp_err;
661                 ret = (j << 4) | base;
662                 if (temp_err == 0)
663                     break;
664             }
665         }
666     }
667     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
668     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
669     if (temp1 > temp2)
670         ret = max;
671     return ret;
672
673 }
674
675
676 #define         QP_MAX                  52
677 #define         VP8_QP_MAX              128
678
679
680 static float intel_lambda_qp(int qp)
681 {
682     float value, lambdaf;
683     value = qp;
684     value = value / 6 - 2;
685     if (value < 0)
686         value = 0;
687     lambdaf = roundf(powf(2, value));
688     return lambdaf;
689 }
690
691 static
692 void intel_h264_calc_mbmvcost_qp(int qp,
693                                  int slice_type,
694                                  uint8_t *vme_state_message)
695 {
696     int m_cost, j, mv_count;
697     float   lambda, m_costf;
698
699     assert(qp <= QP_MAX); 
700     lambda = intel_lambda_qp(qp);
701
702     m_cost = lambda;
703     vme_state_message[MODE_CHROMA_INTRA] = 0;
704     vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f);
705
706     if (slice_type == SLICE_TYPE_I) {
707         vme_state_message[MODE_INTRA_16X16] = 0;
708         m_cost = lambda * 4;
709         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
710         m_cost = lambda * 16; 
711         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
712         m_cost = lambda * 3;
713         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
714     } else {
715         m_cost = 0;
716         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
717         for (j = 1; j < 3; j++) {
718             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
719             m_cost = (int)m_costf;
720             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
721         }
722         mv_count = 3;
723         for (j = 4; j <= 64; j *= 2) {
724             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
725             m_cost = (int)m_costf;
726             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
727             mv_count++;
728         }
729
730         if (qp <= 25) {
731             vme_state_message[MODE_INTRA_16X16] = 0x4a;
732             vme_state_message[MODE_INTRA_8X8] = 0x4a;
733             vme_state_message[MODE_INTRA_4X4] = 0x4a;
734             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
735             vme_state_message[MODE_INTER_16X16] = 0x4a;
736             vme_state_message[MODE_INTER_16X8] = 0x4a;
737             vme_state_message[MODE_INTER_8X8] = 0x4a;
738             vme_state_message[MODE_INTER_8X4] = 0x4a;
739             vme_state_message[MODE_INTER_4X4] = 0x4a;
740             vme_state_message[MODE_INTER_BWD] = 0x2a;
741             return;
742         }
743         m_costf = lambda * 10;
744         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
745         m_cost = lambda * 14;
746         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
747         m_cost = lambda * 24; 
748         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
749         m_costf = lambda * 3.5;
750         m_cost = m_costf;
751         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
752         if (slice_type == SLICE_TYPE_P) {
753             m_costf = lambda * 2.5;
754             m_cost = m_costf;
755             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
756             m_costf = lambda * 4;
757             m_cost = m_costf;
758             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
759             m_costf = lambda * 1.5;
760             m_cost = m_costf;
761             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
762             m_costf = lambda * 3;
763             m_cost = m_costf;
764             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
765             m_costf = lambda * 5;
766             m_cost = m_costf;
767             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
768             /* BWD is not used in P-frame */
769             vme_state_message[MODE_INTER_BWD] = 0;
770         } else {
771             m_costf = lambda * 2.5;
772             m_cost = m_costf;
773             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
774             m_costf = lambda * 5.5;
775             m_cost = m_costf;
776             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
777             m_costf = lambda * 3.5;
778             m_cost = m_costf;
779             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
780             m_costf = lambda * 5.0;
781             m_cost = m_costf;
782             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
783             m_costf = lambda * 6.5;
784             m_cost = m_costf;
785             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
786             m_costf = lambda * 1.5;
787             m_cost = m_costf;
788             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
789         }
790     }
791     return;
792 }
793
794 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
795                                 struct encode_state *encode_state,
796                                 struct intel_encoder_context *encoder_context)
797 {
798     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
799     struct gen6_vme_context *vme_context = encoder_context->vme_context;
800     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
801     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
802     int qp;
803     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
804
805     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
806
807     if (encoder_context->rate_control_mode == VA_RC_CQP)
808         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
809     else
810         qp = mfc_context->brc.qp_prime_y[slice_type];
811
812     if (vme_state_message == NULL)
813         return;
814
815     intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
816 }
817
818 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
819                                 struct encode_state *encode_state,
820                                 struct intel_encoder_context *encoder_context)
821 {
822     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
823     struct gen6_vme_context *vme_context = encoder_context->vme_context;
824     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
825     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
826     int qp, m_cost, j, mv_count;
827     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
828     float   lambda, m_costf;
829
830     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
831     int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
832   
833     if (vme_state_message == NULL)
834         return;
835  
836     if (encoder_context->rate_control_mode == VA_RC_CQP)
837         qp = q_matrix->quantization_index[0];
838     else
839         qp = mfc_context->brc.qp_prime_y[slice_type];
840
841     lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
842
843     m_cost = lambda;
844     vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f);
845
846     if (is_key_frame) {
847         vme_state_message[MODE_INTRA_16X16] = 0;
848         m_cost = lambda * 16; 
849         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
850         m_cost = lambda * 3;
851         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
852     } else {
853         m_cost = 0;
854         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
855         for (j = 1; j < 3; j++) {
856             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
857             m_cost = (int)m_costf;
858             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
859         }
860         mv_count = 3;
861         for (j = 4; j <= 64; j *= 2) {
862             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
863             m_cost = (int)m_costf;
864             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
865             mv_count++;
866         }
867
868         if (qp < 92 ) {
869             vme_state_message[MODE_INTRA_16X16] = 0x4a;
870             vme_state_message[MODE_INTRA_4X4] = 0x4a;
871             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
872             vme_state_message[MODE_INTER_16X16] = 0x4a;
873             vme_state_message[MODE_INTER_16X8] = 0x4a;
874             vme_state_message[MODE_INTER_8X8] = 0x4a;
875             vme_state_message[MODE_INTER_4X4] = 0x4a;
876             vme_state_message[MODE_INTER_BWD] = 0;
877             return;
878         }
879         m_costf = lambda * 10;
880         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
881         m_cost = lambda * 24; 
882         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
883             
884         m_costf = lambda * 3.5;
885         m_cost = m_costf;
886         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
887
888         m_costf = lambda * 2.5;
889         m_cost = m_costf;
890         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
891         m_costf = lambda * 4;
892         m_cost = m_costf;
893         vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
894         m_costf = lambda * 1.5;
895         m_cost = m_costf;
896         vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
897         m_costf = lambda * 5;
898         m_cost = m_costf;
899         vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
900         /* BWD is not used in P-frame */
901         vme_state_message[MODE_INTER_BWD] = 0;
902     }
903 }
904
905 #define         MB_SCOREBOARD_A         (1 << 0)
906 #define         MB_SCOREBOARD_B         (1 << 1)
907 #define         MB_SCOREBOARD_C         (1 << 2)
908 void 
909 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
910 {
911     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
912     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
913     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
914                                                            MB_SCOREBOARD_B |
915                                                            MB_SCOREBOARD_C);
916
917     /* In VME prediction the current mb depends on the neighbour 
918      * A/B/C macroblock. So the left/up/up-right dependency should
919      * be considered.
920      */
921     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
922     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
923     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
924     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
925     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
926     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
927
928     vme_context->gpe_context.vfe_desc7.dword = 0;
929     return;
930 }
931
932 /* check whether the mb of (x_index, y_index) is out of bound */
933 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
934 {
935     int mb_index;
936     if (x_index < 0 || x_index >= mb_width)
937         return -1;
938     if (y_index < 0 || y_index >= mb_height)
939         return -1;
940
941     mb_index = y_index * mb_width + x_index;
942     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
943         return -1;
944     return 0;
945 }
946
947 void
948 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
949                                      struct encode_state *encode_state,
950                                      int mb_width, int mb_height,
951                                      int kernel,
952                                      int transform_8x8_mode_flag,
953                                      struct intel_encoder_context *encoder_context)
954 {
955     struct gen6_vme_context *vme_context = encoder_context->vme_context;
956     int mb_row;
957     int s;
958     unsigned int *command_ptr;
959     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
960     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
961     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
962     int qp,qp_mb,qp_index;
963     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
964
965     if (encoder_context->rate_control_mode == VA_RC_CQP)
966         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
967     else
968         qp = mfc_context->brc.qp_prime_y[slice_type];
969
970 #define         USE_SCOREBOARD          (1 << 21)
971  
972     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
973     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
974
975     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
976         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
977         int first_mb = pSliceParameter->macroblock_address;
978         int num_mb = pSliceParameter->num_macroblocks;
979         unsigned int mb_intra_ub, score_dep;
980         int x_outer, y_outer, x_inner, y_inner;
981         int xtemp_outer = 0;
982
983         x_outer = first_mb % mb_width;
984         y_outer = first_mb / mb_width;
985         mb_row = y_outer;
986
987         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
988             x_inner = x_outer;
989             y_inner = y_outer;
990             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
991                 mb_intra_ub = 0;
992                 score_dep = 0;
993                 if (x_inner != 0) {
994                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
995                     score_dep |= MB_SCOREBOARD_A; 
996                 }
997                 if (y_inner != mb_row) {
998                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
999                     score_dep |= MB_SCOREBOARD_B;
1000                     if (x_inner != 0)
1001                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1002                     if (x_inner != (mb_width -1)) {
1003                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1004                         score_dep |= MB_SCOREBOARD_C;
1005                     }
1006                 }
1007
1008                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1009                 *command_ptr++ = kernel;
1010                 *command_ptr++ = USE_SCOREBOARD;
1011                 /* Indirect data */
1012                 *command_ptr++ = 0;
1013                 /* the (X, Y) term of scoreboard */
1014                 *command_ptr++ = ((y_inner << 16) | x_inner);
1015                 *command_ptr++ = score_dep;
1016                 /*inline data */
1017                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1018                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1019                 /* QP occupies one byte */
1020                 if (vme_context->roi_enabled) {
1021                     qp_index = y_inner * mb_width + x_inner;
1022                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1023                 } else
1024                     qp_mb = qp;
1025                 *command_ptr++ = qp_mb;
1026                 x_inner -= 2;
1027                 y_inner += 1;
1028             }
1029             x_outer += 1;
1030         }
1031
1032         xtemp_outer = mb_width - 2;
1033         if (xtemp_outer < 0)
1034             xtemp_outer = 0;
1035         x_outer = xtemp_outer;
1036         y_outer = first_mb / mb_width;
1037         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1038             y_inner = y_outer;
1039             x_inner = x_outer;
1040             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1041                 mb_intra_ub = 0;
1042                 score_dep = 0;
1043                 if (x_inner != 0) {
1044                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1045                     score_dep |= MB_SCOREBOARD_A; 
1046                 }
1047                 if (y_inner != mb_row) {
1048                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1049                     score_dep |= MB_SCOREBOARD_B;
1050                     if (x_inner != 0)
1051                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1052
1053                     if (x_inner != (mb_width -1)) {
1054                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1055                         score_dep |= MB_SCOREBOARD_C;
1056                     }
1057                 }
1058
1059                 *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
1060                 *command_ptr++ = kernel;
1061                 *command_ptr++ = USE_SCOREBOARD;
1062                 /* Indirect data */
1063                 *command_ptr++ = 0;
1064                 /* the (X, Y) term of scoreboard */
1065                 *command_ptr++ = ((y_inner << 16) | x_inner);
1066                 *command_ptr++ = score_dep;
1067                 /*inline data */
1068                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1069                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1070                 /* qp occupies one byte */
1071                 if (vme_context->roi_enabled) {
1072                     qp_index = y_inner * mb_width + x_inner;
1073                     qp_mb = *(vme_context->qp_per_mb + qp_index);
1074                 } else
1075                     qp_mb = qp;
1076                 *command_ptr++ = qp_mb;
1077
1078                 x_inner -= 2;
1079                 y_inner += 1;
1080             }
1081             x_outer++;
1082             if (x_outer >= mb_width) {
1083                 y_outer += 1;
1084                 x_outer = xtemp_outer;
1085             }           
1086         }
1087     }
1088
1089     *command_ptr++ = 0;
1090     *command_ptr++ = MI_BATCH_BUFFER_END;
1091
1092     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1093 }
1094
1095 static uint8_t
1096 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1097 {
1098     unsigned int is_long_term =
1099         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1100     unsigned int is_top_field =
1101         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1102     unsigned int is_bottom_field =
1103         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1104
1105     return ((is_long_term                         << 6) |
1106             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1107             (frame_store_id                       << 1) |
1108             ((is_top_field ^ 1) & is_bottom_field));
1109 }
1110
1111 void
1112 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1113                             struct encode_state *encode_state,
1114                             struct intel_encoder_context *encoder_context)
1115 {
1116     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1117     struct intel_batchbuffer *batch = encoder_context->base.batch;
1118     int slice_type;
1119     struct object_surface *obj_surface;
1120     unsigned int fref_entry, bref_entry;
1121     int frame_index, i;
1122     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1123
1124     fref_entry = 0x80808080;
1125     bref_entry = 0x80808080;
1126     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1127
1128     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1129         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1130
1131         if (ref_idx_l0 > 3) {
1132             WARN_ONCE("ref_idx_l0 is out of range\n");
1133             ref_idx_l0 = 0;
1134         }
1135
1136         obj_surface = vme_context->used_reference_objects[0];
1137         frame_index = -1;
1138         for (i = 0; i < 16; i++) {
1139             if (obj_surface &&
1140                 obj_surface == encode_state->reference_objects[i]) {
1141                 frame_index = i;
1142                 break;
1143             }
1144         }
1145         if (frame_index == -1) {
1146             WARN_ONCE("RefPicList0 is not found in DPB!\n");
1147         } else {
1148             int ref_idx_l0_shift = ref_idx_l0 * 8;
1149             fref_entry &= ~(0xFF << ref_idx_l0_shift);
1150             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1151         }
1152     }
1153
1154     if (slice_type == SLICE_TYPE_B) {
1155         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1156
1157         if (ref_idx_l1 > 3) {
1158             WARN_ONCE("ref_idx_l1 is out of range\n");
1159             ref_idx_l1 = 0;
1160         }
1161
1162         obj_surface = vme_context->used_reference_objects[1];
1163         frame_index = -1;
1164         for (i = 0; i < 16; i++) {
1165             if (obj_surface &&
1166                 obj_surface == encode_state->reference_objects[i]) {
1167                 frame_index = i;
1168                 break;
1169             }
1170         }
1171         if (frame_index == -1) {
1172             WARN_ONCE("RefPicList1 is not found in DPB!\n");
1173         } else {
1174             int ref_idx_l1_shift = ref_idx_l1 * 8;
1175             bref_entry &= ~(0xFF << ref_idx_l1_shift);
1176             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1177         }
1178     }
1179
1180     BEGIN_BCS_BATCH(batch, 10);
1181     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1182     OUT_BCS_BATCH(batch, 0);                  //Select L0
1183     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1184     for(i = 0; i < 7; i++) {
1185         OUT_BCS_BATCH(batch, 0x80808080);
1186     }
1187     ADVANCE_BCS_BATCH(batch);
1188
1189     BEGIN_BCS_BATCH(batch, 10);
1190     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1191     OUT_BCS_BATCH(batch, 1);                  //Select L1
1192     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1193     for(i = 0; i < 7; i++) {
1194         OUT_BCS_BATCH(batch, 0x80808080);
1195     }
1196     ADVANCE_BCS_BATCH(batch);
1197 }
1198
1199
1200 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1201                                  struct encode_state *encode_state,
1202                                  struct intel_encoder_context *encoder_context)
1203 {
1204     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1205     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1206     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1207     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1208     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1209     uint32_t mv_x, mv_y;
1210     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1211     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1212     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1213
1214     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1215         mv_x = 512;
1216         mv_y = 64;
1217     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1218         mv_x = 1024;
1219         mv_y = 128;
1220     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1221         mv_x = 2048;
1222         mv_y = 128;
1223     } else {
1224         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1225         mv_x = 512;
1226         mv_y = 64;
1227     }
1228
1229     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1230     if (pic_param->picture_type != VAEncPictureTypeIntra) {
1231         int qp, m_cost, j, mv_count;
1232         float   lambda, m_costf;
1233         slice_param = (VAEncSliceParameterBufferMPEG2 *)
1234             encode_state->slice_params_ext[0]->buffer;
1235         qp = slice_param->quantiser_scale_code;
1236         lambda = intel_lambda_qp(qp);
1237         /* No Intra prediction. So it is zero */
1238         vme_state_message[MODE_INTRA_8X8] = 0;
1239         vme_state_message[MODE_INTRA_4X4] = 0;
1240         vme_state_message[MODE_INTER_MV0] = 0;
1241         for (j = 1; j < 3; j++) {
1242             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1243             m_cost = (int)m_costf;
1244             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1245         }
1246         mv_count = 3;
1247         for (j = 4; j <= 64; j *= 2) {
1248             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1249             m_cost = (int)m_costf;
1250             vme_state_message[MODE_INTER_MV0 + mv_count] =
1251                 intel_format_lutvalue(m_cost, 0x6f);
1252             mv_count++;
1253         }
1254         m_cost = lambda;
1255         /* It can only perform the 16x16 search. So mode cost can be ignored for
1256          * the other mode. for example: 16x8/8x8
1257          */
1258         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1259         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1260
1261         vme_state_message[MODE_INTER_16X8] = 0;
1262         vme_state_message[MODE_INTER_8X8] = 0;
1263         vme_state_message[MODE_INTER_8X4] = 0;
1264         vme_state_message[MODE_INTER_4X4] = 0;
1265         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1266
1267     }
1268     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1269
1270     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1271         width_in_mbs;
1272 }
1273
1274 void
1275 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx, 
1276                                            struct encode_state *encode_state,
1277                                            int mb_width, int mb_height,
1278                                            int kernel,
1279                                            struct intel_encoder_context *encoder_context)
1280 {
1281     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1282     unsigned int *command_ptr;
1283
1284 #define         MPEG2_SCOREBOARD                (1 << 21)
1285
1286     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1287     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1288
1289     {
1290         unsigned int mb_intra_ub, score_dep;
1291         int x_outer, y_outer, x_inner, y_inner;
1292         int xtemp_outer = 0;
1293         int first_mb = 0;
1294         int num_mb = mb_width * mb_height;
1295
1296         x_outer = 0;
1297         y_outer = 0;
1298
1299
1300         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1301             x_inner = x_outer;
1302             y_inner = y_outer;
1303             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1304                 mb_intra_ub = 0;
1305                 score_dep = 0;
1306                 if (x_inner != 0) {
1307                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1308                     score_dep |= MB_SCOREBOARD_A; 
1309                 }
1310                 if (y_inner != 0) {
1311                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1312                     score_dep |= MB_SCOREBOARD_B;
1313
1314                     if (x_inner != 0)
1315                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1316
1317                     if (x_inner != (mb_width -1)) {
1318                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1319                         score_dep |= MB_SCOREBOARD_C;
1320                     }
1321                 }
1322
1323                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1324                 *command_ptr++ = kernel;
1325                 *command_ptr++ = MPEG2_SCOREBOARD;
1326                 /* Indirect data */
1327                 *command_ptr++ = 0;
1328                 /* the (X, Y) term of scoreboard */
1329                 *command_ptr++ = ((y_inner << 16) | x_inner);
1330                 *command_ptr++ = score_dep;
1331                 /*inline data */
1332                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1333                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1334                 x_inner -= 2;
1335                 y_inner += 1;
1336             }
1337             x_outer += 1;
1338         }
1339
1340         xtemp_outer = mb_width - 2;
1341         if (xtemp_outer < 0)
1342             xtemp_outer = 0;
1343         x_outer = xtemp_outer;
1344         y_outer = 0;
1345         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
1346             y_inner = y_outer;
1347             x_inner = x_outer;
1348             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1349                 mb_intra_ub = 0;
1350                 score_dep = 0;
1351                 if (x_inner != 0) {
1352                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1353                     score_dep |= MB_SCOREBOARD_A; 
1354                 }
1355                 if (y_inner != 0) {
1356                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1357                     score_dep |= MB_SCOREBOARD_B;
1358
1359                     if (x_inner != 0)
1360                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1361
1362                     if (x_inner != (mb_width -1)) {
1363                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1364                         score_dep |= MB_SCOREBOARD_C;
1365                     }
1366                 }
1367
1368                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1369                 *command_ptr++ = kernel;
1370                 *command_ptr++ = MPEG2_SCOREBOARD;
1371                 /* Indirect data */
1372                 *command_ptr++ = 0;
1373                 /* the (X, Y) term of scoreboard */
1374                 *command_ptr++ = ((y_inner << 16) | x_inner);
1375                 *command_ptr++ = score_dep;
1376                 /*inline data */
1377                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1378                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1379
1380                 x_inner -= 2;
1381                 y_inner += 1;
1382             }
1383             x_outer++;
1384             if (x_outer >= mb_width) {
1385                 y_outer += 1;
1386                 x_outer = xtemp_outer;
1387             }           
1388         }
1389     }
1390
1391     *command_ptr++ = 0;
1392     *command_ptr++ = MI_BATCH_BUFFER_END;
1393
1394     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1395     return;
1396 }
1397
1398 static int
1399 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1400                           VAPictureH264 *ref_list,
1401                           int num_pictures,
1402                           int dir)
1403 {
1404     int i, found = -1, min = 0x7FFFFFFF;
1405
1406     for (i = 0; i < num_pictures; i++) {
1407         int tmp;
1408
1409         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1410             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1411             break;
1412
1413         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1414
1415         if (dir)
1416             tmp = -tmp;
1417
1418         if (tmp > 0 && tmp < min) {
1419             min = tmp;
1420             found = i;
1421         }
1422     }
1423
1424     return found;
1425 }
1426
1427 void
1428 intel_avc_vme_reference_state(VADriverContextP ctx,
1429                               struct encode_state *encode_state,
1430                               struct intel_encoder_context *encoder_context,
1431                               int list_index,
1432                               int surface_index,
1433                               void (* vme_source_surface_state)(
1434                                   VADriverContextP ctx,
1435                                   int index,
1436                                   struct object_surface *obj_surface,
1437                                   struct intel_encoder_context *encoder_context))
1438 {
1439     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1440     struct object_surface *obj_surface = NULL;
1441     struct i965_driver_data *i965 = i965_driver_data(ctx);
1442     VASurfaceID ref_surface_id;
1443     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1444     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1445     int max_num_references;
1446     VAPictureH264 *curr_pic;
1447     VAPictureH264 *ref_list;
1448     int ref_idx;
1449
1450     if (list_index == 0) {
1451         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1452         ref_list = slice_param->RefPicList0;
1453     } else {
1454         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1455         ref_list = slice_param->RefPicList1;
1456     }
1457
1458     if (max_num_references == 1) {
1459         if (list_index == 0) {
1460             ref_surface_id = slice_param->RefPicList0[0].picture_id;
1461             vme_context->used_references[0] = &slice_param->RefPicList0[0];
1462         } else {
1463             ref_surface_id = slice_param->RefPicList1[0].picture_id;
1464             vme_context->used_references[1] = &slice_param->RefPicList1[0];
1465         }
1466
1467         if (ref_surface_id != VA_INVALID_SURFACE)
1468             obj_surface = SURFACE(ref_surface_id);
1469
1470         if (!obj_surface ||
1471             !obj_surface->bo) {
1472             obj_surface = encode_state->reference_objects[list_index];
1473             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1474         }
1475
1476         ref_idx = 0;
1477     } else {
1478         curr_pic = &pic_param->CurrPic;
1479
1480         /* select the reference frame in temporal space */
1481         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1482         ref_surface_id = ref_list[ref_idx].picture_id;
1483
1484         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1485             obj_surface = SURFACE(ref_surface_id);
1486
1487         vme_context->used_reference_objects[list_index] = obj_surface;
1488         vme_context->used_references[list_index] = &ref_list[ref_idx];
1489     }
1490
1491     if (obj_surface &&
1492         obj_surface->bo) {
1493         assert(ref_idx >= 0);
1494         vme_context->used_reference_objects[list_index] = obj_surface;
1495         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1496         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1497                                                     ref_idx << 16 |
1498                                                     ref_idx <<  8 |
1499                                                     ref_idx);
1500     } else {
1501         vme_context->used_reference_objects[list_index] = NULL;
1502         vme_context->used_references[list_index] = NULL;
1503         vme_context->ref_index_in_mb[list_index] = 0;
1504     }
1505 }
1506
1507 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1508                                         struct encode_state *encode_state,
1509                                         struct intel_encoder_context *encoder_context,
1510                                         int slice_index,
1511                                         struct intel_batchbuffer *slice_batch)
1512 {
1513     int count, i, start_index;
1514     unsigned int length_in_bits;
1515     VAEncPackedHeaderParameterBuffer *param = NULL;
1516     unsigned int *header_data = NULL;
1517     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1518     int slice_header_index;
1519
1520     if (encode_state->slice_header_index[slice_index] == 0)
1521         slice_header_index = -1;
1522     else
1523         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1524
1525     count = encode_state->slice_rawdata_count[slice_index];
1526     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1527
1528     for (i = 0; i < count; i++) {
1529         unsigned int skip_emul_byte_cnt;
1530
1531         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1532
1533         param = (VAEncPackedHeaderParameterBuffer *)
1534                     (encode_state->packed_header_params_ext[start_index + i]->buffer);
1535
1536         /* skip the slice header packed data type as it is lastly inserted */
1537         if (param->type == VAEncPackedHeaderSlice)
1538             continue;
1539
1540         length_in_bits = param->bit_length;
1541
1542         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1543
1544         /* as the slice header is still required, the last header flag is set to
1545          * zero.
1546          */
1547         mfc_context->insert_object(ctx,
1548                                    encoder_context,
1549                                    header_data,
1550                                    ALIGN(length_in_bits, 32) >> 5,
1551                                    length_in_bits & 0x1f,
1552                                    skip_emul_byte_cnt,
1553                                    0,
1554                                    0,
1555                                    !param->has_emulation_bytes,
1556                                    slice_batch);
1557     }
1558
1559     if (slice_header_index == -1) {
1560         unsigned char *slice_header = NULL;
1561         int slice_header_length_in_bits = 0;
1562         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1563         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1564         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1565
1566         /* No slice header data is passed. And the driver needs to generate it */
1567         /* For the Normal H264 */
1568         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1569                                                              pPicParameter,
1570                                                              pSliceParameter,
1571                                                              &slice_header);
1572         mfc_context->insert_object(ctx, encoder_context,
1573                                    (unsigned int *)slice_header,
1574                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
1575                                    slice_header_length_in_bits & 0x1f,
1576                                    5,  /* first 5 bytes are start code + nal unit type */
1577                                    1, 0, 1, slice_batch);
1578
1579         free(slice_header);
1580     } else {
1581         unsigned int skip_emul_byte_cnt;
1582
1583         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1584
1585         param = (VAEncPackedHeaderParameterBuffer *)
1586                     (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1587         length_in_bits = param->bit_length;
1588
1589         /* as the slice header is the last header data for one slice,
1590          * the last header flag is set to one.
1591          */
1592         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1593
1594         mfc_context->insert_object(ctx,
1595                                    encoder_context,
1596                                    header_data,
1597                                    ALIGN(length_in_bits, 32) >> 5,
1598                                    length_in_bits & 0x1f,
1599                                    skip_emul_byte_cnt,
1600                                    1,
1601                                    0,
1602                                    !param->has_emulation_bytes,
1603                                    slice_batch);
1604     }
1605
1606     return;
1607 }
1608
1609 void
1610 intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
1611                                 struct encode_state *encode_state,
1612                                 struct intel_encoder_context *encoder_context)
1613 {
1614     struct i965_driver_data *i965 = i965_driver_data(ctx);
1615     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1616     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1617     int qp;
1618     dri_bo *bo;
1619     uint8_t *cost_table;
1620
1621     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1622
1623
1624     if (slice_type == SLICE_TYPE_I) {
1625         if (vme_context->i_qp_cost_table)
1626             return;
1627     } else if (slice_type == SLICE_TYPE_P) {
1628         if (vme_context->p_qp_cost_table)
1629             return;
1630     } else {
1631         if (vme_context->b_qp_cost_table)
1632             return;
1633     }
1634
1635     /* It is enough to allocate 32 bytes for each qp. */
1636     bo = dri_bo_alloc(i965->intel.bufmgr,
1637                       "cost_table ",
1638                       QP_MAX * 32,
1639                       64);
1640
1641     dri_bo_map(bo, 1);
1642     assert(bo->virtual);
1643     cost_table = (uint8_t *)(bo->virtual);
1644     for (qp = 0; qp < QP_MAX; qp++) {
1645         intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
1646         cost_table += 32;
1647     }
1648
1649     dri_bo_unmap(bo);
1650
1651     if (slice_type == SLICE_TYPE_I) {
1652         vme_context->i_qp_cost_table = bo;
1653     } else if (slice_type == SLICE_TYPE_P) {
1654         vme_context->p_qp_cost_table = bo;
1655     } else {
1656         vme_context->b_qp_cost_table = bo;
1657     }
1658
1659     vme_context->cost_table_size = QP_MAX * 32;
1660     return;
1661 }
1662
1663 extern void
1664 intel_h264_setup_cost_surface(VADriverContextP ctx,
1665                               struct encode_state *encode_state,
1666                               struct intel_encoder_context *encoder_context,
1667                               unsigned long binding_table_offset,
1668                               unsigned long surface_state_offset)
1669 {
1670     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1671     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1672     dri_bo *bo;
1673
1674
1675     struct i965_buffer_surface cost_table;
1676
1677     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1678
1679
1680     if (slice_type == SLICE_TYPE_I) {
1681         bo = vme_context->i_qp_cost_table;
1682     } else if (slice_type == SLICE_TYPE_P) {
1683         bo = vme_context->p_qp_cost_table;
1684     } else {
1685         bo = vme_context->b_qp_cost_table;
1686     }
1687
1688     cost_table.bo = bo;
1689     cost_table.num_blocks = QP_MAX;
1690     cost_table.pitch = 16;
1691     cost_table.size_block = 32;
1692
1693     vme_context->vme_buffer_suface_setup(ctx,
1694                                          &vme_context->gpe_context,
1695                                          &cost_table,
1696                                          binding_table_offset,
1697                                          surface_state_offset);
1698 }
1699
1700 /*
1701  * the idea of conversion between qp and qstep comes from scaling process
1702  * of transform coeff for Luma component in H264 spec.
1703  *   2^(Qpy / 6 - 6)
1704  * In order to avoid too small qstep, it is multiplied by 16.
1705  */
1706 static float intel_h264_qp_qstep(int qp)
1707 {
1708     float value, qstep;
1709     value = qp;
1710     value = value / 6 - 2;
1711     qstep = powf(2, value);
1712     return qstep;
1713 }
1714
1715 static int intel_h264_qstep_qp(float qstep)
1716 {
1717     float qp;
1718
1719     qp = 12.0f + 6.0f * log2f(qstep);
1720
1721     return floorf(qp);
1722 }
1723
1724 /*
1725  * Currently it is based on the following assumption:
1726  * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep =
1727  *                                 total_aread * 1 / baseqp_qstep
1728  *
1729  * qstep is the linearized quantizer of H264 quantizer
1730  */
1731 typedef struct {
1732     int row_start_in_mb;
1733     int row_end_in_mb;
1734     int col_start_in_mb;
1735     int col_end_in_mb;
1736
1737     int width_mbs;
1738     int height_mbs;
1739
1740     int roi_qp;
1741 } ROIRegionParam;
1742
1743 static VAStatus
1744 intel_h264_enc_roi_cbr(VADriverContextP ctx,
1745                        int base_qp,
1746                        VAEncMiscParameterBufferROI *pMiscParamROI,
1747                        struct encode_state *encode_state,
1748                        struct intel_encoder_context *encoder_context)
1749 {
1750     int nonroi_qp;
1751     VAEncROI *region_roi;
1752     bool quickfill = 0;
1753
1754     ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS];
1755     int num_roi = 0;
1756     int i,j;
1757
1758     float temp;
1759     float qstep_nonroi, qstep_base;
1760     float roi_area, total_area, nonroi_area;
1761     float sum_roi;
1762
1763     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1764     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1765     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1766     int mbs_in_picture = width_in_mbs * height_in_mbs;
1767
1768     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1769     VAStatus vaStatus = VA_STATUS_SUCCESS;
1770
1771     if(pMiscParamROI != NULL)
1772     {
1773         num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi;
1774
1775         /* currently roi_value_is_qp_delta is the only supported mode of priority.
1776         *
1777         * qp_delta set by user is added to base_qp, which is then clapped by
1778         * [base_qp-min_delta, base_qp+max_delta].
1779         */
1780         ASSERT_RET(pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta,VA_STATUS_ERROR_INVALID_PARAMETER);
1781     }
1782
1783     /* when the base_qp is lower than 12, the quality is quite good based
1784      * on the H264 test experience.
1785      * In such case it is unnecessary to adjust the quality for ROI region.
1786      */
1787     if (base_qp <= 12) {
1788         nonroi_qp = base_qp;
1789         quickfill = 1;
1790         goto qp_fill;
1791     }
1792
1793     sum_roi = 0.0f;
1794     roi_area = 0;
1795     for (i = 0; i < num_roi; i++) {
1796         int row_start, row_end, col_start, col_end;
1797         int roi_width_mbs, roi_height_mbs;
1798         int mbs_in_roi;
1799         int roi_qp;
1800         float qstep_roi;
1801
1802         region_roi =  (VAEncROI *)pMiscParamROI->roi + i;
1803
1804         col_start = region_roi->roi_rectangle.x;
1805         col_end = col_start + region_roi->roi_rectangle.width;
1806         row_start = region_roi->roi_rectangle.y;
1807         row_end = row_start + region_roi->roi_rectangle.height;
1808         col_start = col_start / 16;
1809         col_end = (col_end + 15) / 16;
1810         row_start = row_start / 16;
1811         row_end = (row_end + 15) / 16;
1812
1813         roi_width_mbs = col_end - col_start;
1814         roi_height_mbs = row_end - row_start;
1815         mbs_in_roi = roi_width_mbs * roi_height_mbs;
1816
1817         param_regions[i].row_start_in_mb = row_start;
1818         param_regions[i].row_end_in_mb = row_end;
1819         param_regions[i].col_start_in_mb = col_start;
1820         param_regions[i].col_end_in_mb = col_end;
1821         param_regions[i].width_mbs = roi_width_mbs;
1822         param_regions[i].height_mbs = roi_height_mbs;
1823
1824         roi_qp = base_qp + region_roi->roi_value;
1825         BRC_CLIP(roi_qp, 1, 51);
1826
1827         param_regions[i].roi_qp = roi_qp;
1828         qstep_roi = intel_h264_qp_qstep(roi_qp);
1829
1830         roi_area += mbs_in_roi;
1831         sum_roi += mbs_in_roi / qstep_roi;
1832     }
1833
1834     total_area = mbs_in_picture;
1835     nonroi_area = total_area - roi_area;
1836
1837     qstep_base = intel_h264_qp_qstep(base_qp);
1838     temp = (total_area / qstep_base - sum_roi);
1839
1840     if (temp < 0) {
1841         nonroi_qp = 51;
1842     } else {
1843         qstep_nonroi = nonroi_area / temp;
1844         nonroi_qp = intel_h264_qstep_qp(qstep_nonroi);
1845     }
1846
1847     BRC_CLIP(nonroi_qp, 1, 51);
1848
1849 qp_fill:
1850     memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture);
1851     if (!quickfill) {
1852         char *qp_ptr;
1853
1854         for (i = 0; i < num_roi; i++) {
1855             for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) {
1856                 qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb;
1857                 memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs);
1858             }
1859         }
1860     }
1861     return vaStatus;
1862 }
1863
1864 extern void
1865 intel_h264_enc_roi_config(VADriverContextP ctx,
1866                           struct encode_state *encode_state,
1867                           struct intel_encoder_context *encoder_context)
1868 {
1869     char *qp_ptr;
1870     int i, j;
1871     VAEncROI *region_roi;
1872     struct i965_driver_data *i965 = i965_driver_data(ctx);
1873     VAEncMiscParameterBuffer* pMiscParamROI;
1874     VAEncMiscParameterBufferROI *pParamROI = NULL;
1875     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1876     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1877     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1878     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1879     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1880
1881     int row_start, row_end, col_start, col_end;
1882     int num_roi = 0;
1883
1884     vme_context->roi_enabled = 0;
1885     /* Restriction: Disable ROI when multi-slice is enabled */
1886     if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1))
1887         return;
1888
1889     if (encode_state->misc_param[VAEncMiscParameterTypeROI][0] != NULL) {
1890         pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI][0]->buffer;
1891         pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data;
1892
1893         /* check whether number of ROI is correct */
1894         num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi;
1895     }
1896
1897     if (num_roi > 0)
1898         vme_context->roi_enabled = 1;
1899
1900     if (!vme_context->roi_enabled)
1901         return;
1902
1903     if ((vme_context->saved_width_mbs !=  width_in_mbs) ||
1904         (vme_context->saved_height_mbs != height_in_mbs)) {
1905         free(vme_context->qp_per_mb);
1906         vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs);
1907
1908         vme_context->saved_width_mbs = width_in_mbs;
1909         vme_context->saved_height_mbs = height_in_mbs;
1910         assert(vme_context->qp_per_mb);
1911     }
1912     if (encoder_context->rate_control_mode == VA_RC_CBR) {
1913         /*
1914          * TODO: More complex Qp adjust needs to be added.
1915          * Currently it is initialized to slice_qp.
1916          */
1917         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1918         int qp;
1919         int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1920
1921         qp = mfc_context->brc.qp_prime_y[slice_type];
1922         intel_h264_enc_roi_cbr(ctx, qp, pParamROI,encode_state, encoder_context);
1923
1924     } else if (encoder_context->rate_control_mode == VA_RC_CQP){
1925         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1926         VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1927         int qp;
1928
1929         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1930         memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs);
1931
1932
1933         for (j = num_roi; j ; j--) {
1934             int qp_delta, qp_clip;
1935
1936             region_roi =  (VAEncROI *)pParamROI->roi + j - 1;
1937
1938             col_start = region_roi->roi_rectangle.x;
1939             col_end = col_start + region_roi->roi_rectangle.width;
1940             row_start = region_roi->roi_rectangle.y;
1941             row_end = row_start + region_roi->roi_rectangle.height;
1942
1943             col_start = col_start / 16;
1944             col_end = (col_end + 15) / 16;
1945             row_start = row_start / 16;
1946             row_end = (row_end + 15) / 16;
1947
1948             qp_delta = region_roi->roi_value;
1949             qp_clip = qp + qp_delta;
1950
1951             BRC_CLIP(qp_clip, 1, 51);
1952
1953             for (i = row_start; i < row_end; i++) {
1954                 qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start;
1955                 memset(qp_ptr, qp_clip, (col_end - col_start));
1956             }
1957         }
1958     } else {
1959         /*
1960          * TODO: Disable it for non CBR-CQP.
1961          */
1962         vme_context->roi_enabled = 0;
1963     }
1964
1965     if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info))
1966         encoder_context->soft_batch_force = 1;
1967
1968     return;
1969 }
1970
1971 /* HEVC */
1972 static int
1973 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
1974                            VAPictureHEVC *ref_list,
1975                            int num_pictures,
1976                            int dir)
1977 {
1978     int i, found = -1, min = 0x7FFFFFFF;
1979
1980     for (i = 0; i < num_pictures; i++) {
1981         int tmp;
1982
1983         if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
1984             (ref_list[i].picture_id == VA_INVALID_SURFACE))
1985             break;
1986
1987         tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
1988
1989         if (dir)
1990             tmp = -tmp;
1991
1992         if (tmp > 0 && tmp < min) {
1993             min = tmp;
1994             found = i;
1995         }
1996     }
1997
1998     return found;
1999 }
2000 void
2001 intel_hevc_vme_reference_state(VADriverContextP ctx,
2002                                struct encode_state *encode_state,
2003                                struct intel_encoder_context *encoder_context,
2004                                int list_index,
2005                                int surface_index,
2006                                void (* vme_source_surface_state)(
2007                                    VADriverContextP ctx,
2008                                    int index,
2009                                    struct object_surface *obj_surface,
2010                                    struct intel_encoder_context *encoder_context))
2011 {
2012     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2013     struct object_surface *obj_surface = NULL;
2014     struct i965_driver_data *i965 = i965_driver_data(ctx);
2015     VASurfaceID ref_surface_id;
2016     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2017     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2018     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2019     int max_num_references;
2020     VAPictureHEVC *curr_pic;
2021     VAPictureHEVC *ref_list;
2022     int ref_idx;
2023     unsigned int is_hevc10 = 0;
2024     GenHevcSurface *hevc_encoder_surface = NULL;
2025
2026     if((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
2027         || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2028         is_hevc10 = 1;
2029
2030     if (list_index == 0) {
2031         max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
2032         ref_list = slice_param->ref_pic_list0;
2033     } else {
2034         max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
2035         ref_list = slice_param->ref_pic_list1;
2036     }
2037
2038     if (max_num_references == 1) {
2039         if (list_index == 0) {
2040             ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
2041             vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
2042         } else {
2043             ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
2044             vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
2045         }
2046
2047         if (ref_surface_id != VA_INVALID_SURFACE)
2048             obj_surface = SURFACE(ref_surface_id);
2049
2050         if (!obj_surface ||
2051             !obj_surface->bo) {
2052             obj_surface = encode_state->reference_objects[list_index];
2053             vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
2054         }
2055
2056         ref_idx = 0;
2057     } else {
2058         curr_pic = &pic_param->decoded_curr_pic;
2059
2060         /* select the reference frame in temporal space */
2061         ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
2062         ref_surface_id = ref_list[ref_idx].picture_id;
2063
2064         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
2065             obj_surface = SURFACE(ref_surface_id);
2066
2067         vme_context->used_reference_objects[list_index] = obj_surface;
2068         vme_context->used_references[list_index] = &ref_list[ref_idx];
2069     }
2070
2071     if (obj_surface &&
2072         obj_surface->bo) {
2073         assert(ref_idx >= 0);
2074         vme_context->used_reference_objects[list_index] = obj_surface;
2075
2076         if(is_hevc10){
2077             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2078             assert(hevc_encoder_surface);
2079             obj_surface = hevc_encoder_surface->nv12_surface_obj;
2080         }
2081         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
2082         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
2083                 ref_idx << 16 |
2084                 ref_idx <<  8 |
2085                 ref_idx);
2086     } else {
2087         vme_context->used_reference_objects[list_index] = NULL;
2088         vme_context->used_references[list_index] = NULL;
2089         vme_context->ref_index_in_mb[list_index] = 0;
2090     }
2091 }
2092
2093 void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
2094                                      struct encode_state *encode_state,
2095                                      struct intel_encoder_context *encoder_context)
2096 {
2097     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2098     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2099     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2100     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2101     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2102     int qp, m_cost, j, mv_count;
2103     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
2104     float   lambda, m_costf;
2105
2106     /* here no SI SP slice for HEVC, do not need slice fixup */
2107     int slice_type = slice_param->slice_type;
2108
2109
2110     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2111
2112     if(encoder_context->rate_control_mode == VA_RC_CBR)
2113     {
2114         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
2115         if(slice_type == HEVC_SLICE_B) {
2116             if(pSequenceParameter->ip_period == 1)
2117             {
2118                 slice_type = HEVC_SLICE_P;
2119                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2120
2121             }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
2122                 slice_type = HEVC_SLICE_P;
2123                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2124             }
2125         }
2126
2127     }
2128
2129     if (vme_state_message == NULL)
2130         return;
2131
2132     assert(qp <= QP_MAX);
2133     lambda = intel_lambda_qp(qp);
2134     if (slice_type == HEVC_SLICE_I) {
2135         vme_state_message[MODE_INTRA_16X16] = 0;
2136         m_cost = lambda * 4;
2137         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2138         m_cost = lambda * 16;
2139         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2140         m_cost = lambda * 3;
2141         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2142     } else {
2143         m_cost = 0;
2144         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
2145         for (j = 1; j < 3; j++) {
2146             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2147             m_cost = (int)m_costf;
2148             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
2149         }
2150         mv_count = 3;
2151         for (j = 4; j <= 64; j *= 2) {
2152             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
2153             m_cost = (int)m_costf;
2154             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
2155             mv_count++;
2156         }
2157
2158         if (qp <= 25) {
2159             vme_state_message[MODE_INTRA_16X16] = 0x4a;
2160             vme_state_message[MODE_INTRA_8X8] = 0x4a;
2161             vme_state_message[MODE_INTRA_4X4] = 0x4a;
2162             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
2163             vme_state_message[MODE_INTER_16X16] = 0x4a;
2164             vme_state_message[MODE_INTER_16X8] = 0x4a;
2165             vme_state_message[MODE_INTER_8X8] = 0x4a;
2166             vme_state_message[MODE_INTER_8X4] = 0x4a;
2167             vme_state_message[MODE_INTER_4X4] = 0x4a;
2168             vme_state_message[MODE_INTER_BWD] = 0x2a;
2169             return;
2170         }
2171         m_costf = lambda * 10;
2172         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2173         m_cost = lambda * 14;
2174         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
2175         m_cost = lambda * 24;
2176         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
2177         m_costf = lambda * 3.5;
2178         m_cost = m_costf;
2179         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
2180         if (slice_type == HEVC_SLICE_P) {
2181             m_costf = lambda * 2.5;
2182             m_cost = m_costf;
2183             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2184             m_costf = lambda * 4;
2185             m_cost = m_costf;
2186             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2187             m_costf = lambda * 1.5;
2188             m_cost = m_costf;
2189             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2190             m_costf = lambda * 3;
2191             m_cost = m_costf;
2192             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2193             m_costf = lambda * 5;
2194             m_cost = m_costf;
2195             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2196             /* BWD is not used in P-frame */
2197             vme_state_message[MODE_INTER_BWD] = 0;
2198         } else {
2199             m_costf = lambda * 2.5;
2200             m_cost = m_costf;
2201             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
2202             m_costf = lambda * 5.5;
2203             m_cost = m_costf;
2204             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
2205             m_costf = lambda * 3.5;
2206             m_cost = m_costf;
2207             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
2208             m_costf = lambda * 5.0;
2209             m_cost = m_costf;
2210             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
2211             m_costf = lambda * 6.5;
2212             m_cost = m_costf;
2213             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
2214             m_costf = lambda * 1.5;
2215             m_cost = m_costf;
2216             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
2217         }
2218     }
2219 }