OSDN Git Service

intel-vaapi-driver 1.8.1.pre1
[android-x86/hardware-intel-common-vaapi.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define AVC_INTRA_RDO_OFFSET    4
47 #define AVC_INTER_RDO_OFFSET    10
48 #define AVC_INTER_MSG_OFFSET    8
49 #define AVC_INTER_MV_OFFSET     48
50 #define AVC_RDO_MASK            0xFFFF
51
52 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
53 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
54 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
55
56 #define B0_STEP_REV             2
57 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
58
59 static const uint32_t gen75_mfc_batchbuffer_avc[][4] = {
60 #include "shaders/utils/mfc_batchbuffer_hsw.g75b"
61 };
62
63 static struct i965_kernel gen75_mfc_kernels[] = {
64     {
65         "MFC AVC INTRA BATCHBUFFER ",
66         MFC_BATCHBUFFER_AVC_INTRA,
67         gen75_mfc_batchbuffer_avc,
68         sizeof(gen75_mfc_batchbuffer_avc),
69         NULL
70     },
71 };
72
73 #define         INTER_MODE_MASK         0x03
74 #define         INTER_8X8               0x03
75 #define         INTER_16X8              0x01
76 #define         INTER_8X16              0x02
77 #define         SUBMB_SHAPE_MASK        0x00FF00
78
79 #define         INTER_MV8               (4 << 20)
80 #define         INTER_MV32              (6 << 20)
81
82
83 static void
84 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
85                            int standard_select,
86                            struct intel_encoder_context *encoder_context)
87 {
88     struct intel_batchbuffer *batch = encoder_context->base.batch;
89     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
90     assert(standard_select == MFX_FORMAT_MPEG2 ||
91            standard_select == MFX_FORMAT_AVC);
92
93     BEGIN_BCS_BATCH(batch, 5);
94
95     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
96     OUT_BCS_BATCH(batch,
97                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
98                   (MFD_MODE_VLD << 15) | /* VLD mode */
99                   (0 << 10) | /* Stream-Out Enable */
100                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
101                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
102                   (0 << 5)  | /* not in stitch mode */
103                   (1 << 4)  | /* encoding mode */
104                   (standard_select << 0));  /* standard select: avc or mpeg2 */
105     OUT_BCS_BATCH(batch,
106                   (0 << 7)  | /* expand NOA bus flag */
107                   (0 << 6)  | /* disable slice-level clock gating */
108                   (0 << 5)  | /* disable clock gating for NOA */
109                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
110                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
111                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
112                   (0 << 1)  |
113                   (0 << 0));
114     OUT_BCS_BATCH(batch, 0);
115     OUT_BCS_BATCH(batch, 0);
116
117     ADVANCE_BCS_BATCH(batch);
118 }
119
120 static void
121 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
122 {
123     struct intel_batchbuffer *batch = encoder_context->base.batch;
124     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
125
126     BEGIN_BCS_BATCH(batch, 6);
127
128     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
129     OUT_BCS_BATCH(batch, 0);
130     OUT_BCS_BATCH(batch,
131                   ((mfc_context->surface_state.height - 1) << 18) |
132                   ((mfc_context->surface_state.width - 1) << 4));
133     OUT_BCS_BATCH(batch,
134                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
135                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
136                   (0 << 22) | /* surface object control state, FIXME??? */
137                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
138                   (0 << 2)  | /* must be 0 for interleave U/V */
139                   (1 << 1)  | /* must be tiled */
140                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
141     OUT_BCS_BATCH(batch,
142                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
143                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
144     OUT_BCS_BATCH(batch, 0);
145
146     ADVANCE_BCS_BATCH(batch);
147 }
148
149 static void
150 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
151                                         struct intel_encoder_context *encoder_context)
152 {
153     struct intel_batchbuffer *batch = encoder_context->base.batch;
154     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
155     struct gen6_vme_context *vme_context = encoder_context->vme_context;
156
157     BEGIN_BCS_BATCH(batch, 26);
158
159     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
160     /* the DW1-3 is for the MFX indirect bistream offset */
161     OUT_BCS_BATCH(batch, 0);
162     OUT_BCS_BATCH(batch, 0);
163     OUT_BCS_BATCH(batch, 0);
164     /* the DW4-5 is the MFX upper bound */
165     OUT_BCS_BATCH(batch, 0);
166     OUT_BCS_BATCH(batch, 0);
167
168     /* the DW6-10 is for MFX Indirect MV Object Base Address */
169     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
170     OUT_BCS_BATCH(batch, 0);
171     OUT_BCS_BATCH(batch, 0);
172     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
173     OUT_BCS_BATCH(batch, 0);
174
175     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
176     OUT_BCS_BATCH(batch, 0);
177     OUT_BCS_BATCH(batch, 0);
178     OUT_BCS_BATCH(batch, 0);
179     OUT_BCS_BATCH(batch, 0);
180     OUT_BCS_BATCH(batch, 0);
181
182     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185     OUT_BCS_BATCH(batch, 0);
186     OUT_BCS_BATCH(batch, 0);
187     OUT_BCS_BATCH(batch, 0);
188
189     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
190     OUT_BCS_RELOC(batch,
191                   mfc_context->mfc_indirect_pak_bse_object.bo,
192                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
193                   0);
194     OUT_BCS_BATCH(batch, 0);
195     OUT_BCS_BATCH(batch, 0);
196         
197     OUT_BCS_RELOC(batch,
198                   mfc_context->mfc_indirect_pak_bse_object.bo,
199                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
200                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
201     OUT_BCS_BATCH(batch, 0);
202
203     ADVANCE_BCS_BATCH(batch);
204 }
205
206 static void
207 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
208 {
209     struct intel_batchbuffer *batch = encoder_context->base.batch;
210     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
211     struct gen6_vme_context *vme_context = encoder_context->vme_context;
212     struct i965_driver_data *i965 = i965_driver_data(ctx);
213
214     if (IS_STEPPING_BPLUS(i965)) {
215         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
216         return;
217     }
218
219     BEGIN_BCS_BATCH(batch, 11);
220
221     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
222     OUT_BCS_BATCH(batch, 0);
223     OUT_BCS_BATCH(batch, 0);
224     /* MFX Indirect MV Object Base Address */
225     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
226     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
227     OUT_BCS_BATCH(batch, 0);
228     OUT_BCS_BATCH(batch, 0);
229     OUT_BCS_BATCH(batch, 0);
230     OUT_BCS_BATCH(batch, 0);
231     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
232     OUT_BCS_RELOC(batch,
233                   mfc_context->mfc_indirect_pak_bse_object.bo,
234                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
235                   0);
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
240
241     ADVANCE_BCS_BATCH(batch);
242 }
243
244 static void
245 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
246                         struct intel_encoder_context *encoder_context)
247 {
248     struct intel_batchbuffer *batch = encoder_context->base.batch;
249     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
250     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
251
252     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
253     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
254
255     BEGIN_BCS_BATCH(batch, 16);
256
257     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
258     /*DW1. MB setting of frame */
259     OUT_BCS_BATCH(batch,
260                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
261     OUT_BCS_BATCH(batch, 
262                   ((height_in_mbs - 1) << 16) | 
263                   ((width_in_mbs - 1) << 0));
264     /* DW3 QP setting */
265     OUT_BCS_BATCH(batch, 
266                   (0 << 24) |   /* Second Chroma QP Offset */
267                   (0 << 16) |   /* Chroma QP Offset */
268                   (0 << 14) |   /* Max-bit conformance Intra flag */
269                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
270                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
271                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
272                   (0 << 8)  |   /* FIXME: Image Structure */
273                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
274     OUT_BCS_BATCH(batch,
275                   (0 << 16) |   /* Mininum Frame size */
276                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
277                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
278                   (0 << 13) |   /* CABAC 0 word insertion test enable */
279                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
280                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
281                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
282                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
283                   (0 << 6)  |   /* Only valid for VLD decoding mode */
284                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
285                   (0 << 4)  |   /* Direct 8x8 inference flag */
286                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
287                   (1 << 2)  |   /* Frame MB only flag */
288                   (0 << 1)  |   /* MBAFF mode is in active */
289                   (0 << 0));    /* Field picture flag */
290     /* DW5 Trellis quantization */
291     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
292     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
293                   (0xBB8 << 16) |       /* InterMbMaxSz */
294                   (0xEE8) );            /* IntraMbMaxSz */
295     OUT_BCS_BATCH(batch, 0);            /* Reserved */
296     /* DW8. QP delta */
297     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
298     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
299     /* DW10. Bit setting for MB */
300     OUT_BCS_BATCH(batch, 0x8C000000);
301     OUT_BCS_BATCH(batch, 0x00010000);
302     /* DW12. */
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0x02010100);
305     /* DW14. For short format */
306     OUT_BCS_BATCH(batch, 0);
307     OUT_BCS_BATCH(batch, 0);
308
309     ADVANCE_BCS_BATCH(batch);
310 }
311
312 static void
313 gen75_mfc_qm_state(VADriverContextP ctx,
314                    int qm_type,
315                    unsigned int *qm,
316                    int qm_length,
317                    struct intel_encoder_context *encoder_context)
318 {
319     struct intel_batchbuffer *batch = encoder_context->base.batch;
320     unsigned int qm_buffer[16];
321
322     assert(qm_length <= 16);
323     assert(sizeof(*qm) == 4);
324     memcpy(qm_buffer, qm, qm_length * 4);
325
326     BEGIN_BCS_BATCH(batch, 18);
327     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
328     OUT_BCS_BATCH(batch, qm_type << 0);
329     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
330     ADVANCE_BCS_BATCH(batch);
331 }
332
333 static void
334 gen75_mfc_avc_qm_state(VADriverContextP ctx,
335                        struct encode_state *encode_state,
336                        struct intel_encoder_context *encoder_context)
337 {
338     unsigned int qm[16] = {
339         0x10101010, 0x10101010, 0x10101010, 0x10101010,
340         0x10101010, 0x10101010, 0x10101010, 0x10101010,
341         0x10101010, 0x10101010, 0x10101010, 0x10101010,
342         0x10101010, 0x10101010, 0x10101010, 0x10101010
343     };
344
345     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
346     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
347     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
348     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
349 }
350
351 static void
352 gen75_mfc_fqm_state(VADriverContextP ctx,
353                     int fqm_type,
354                     unsigned int *fqm,
355                     int fqm_length,
356                     struct intel_encoder_context *encoder_context)
357 {
358     struct intel_batchbuffer *batch = encoder_context->base.batch;
359     unsigned int fqm_buffer[32];
360
361     assert(fqm_length <= 32);
362     assert(sizeof(*fqm) == 4);
363     memcpy(fqm_buffer, fqm, fqm_length * 4);
364
365     BEGIN_BCS_BATCH(batch, 34);
366     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
367     OUT_BCS_BATCH(batch, fqm_type << 0);
368     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
369     ADVANCE_BCS_BATCH(batch);
370 }
371
372 static void
373 gen75_mfc_avc_fqm_state(VADriverContextP ctx,
374                         struct encode_state *encode_state,
375                         struct intel_encoder_context *encoder_context)
376 {
377     unsigned int qm[32] = {
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000,
384         0x10001000, 0x10001000, 0x10001000, 0x10001000,
385         0x10001000, 0x10001000, 0x10001000, 0x10001000
386     };
387
388     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
389     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
390     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
391     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
392 }
393
394 static void
395 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
396                             unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
397                             int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
398                             struct intel_batchbuffer *batch)
399 {
400     if (batch == NULL)
401         batch = encoder_context->base.batch;
402
403     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
404
405     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
406     OUT_BCS_BATCH(batch,
407                   (0 << 16) |   /* always start at offset 0 */
408                   (data_bits_in_last_dw << 8) |
409                   (skip_emul_byte_count << 4) |
410                   (!!emulation_flag << 3) |
411                   ((!!is_last_header) << 2) |
412                   ((!!is_end_of_slice) << 1) |
413                   (0 << 0));    /* FIXME: ??? */
414     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
415
416     ADVANCE_BCS_BATCH(batch);
417 }
418
419
420 static void gen75_mfc_init(VADriverContextP ctx,
421                            struct encode_state *encode_state,
422                            struct intel_encoder_context *encoder_context)
423 {
424     struct i965_driver_data *i965 = i965_driver_data(ctx);
425     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
426     dri_bo *bo;
427     int i;
428     int width_in_mbs = 0;
429     int height_in_mbs = 0;
430     int slice_batchbuffer_size;
431
432     if (encoder_context->codec == CODEC_H264 ||
433         encoder_context->codec == CODEC_H264_MVC) {
434         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
435         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
436         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
437     } else {
438         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
439
440         assert(encoder_context->codec == CODEC_MPEG2);
441
442         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
443         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
444     }
445
446     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
447                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
448
449     /*Encode common setup for MFC*/
450     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
451     mfc_context->post_deblocking_output.bo = NULL;
452
453     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
454     mfc_context->pre_deblocking_output.bo = NULL;
455
456     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
457     mfc_context->uncompressed_picture_source.bo = NULL;
458
459     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
460     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
461
462     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
463         if (mfc_context->direct_mv_buffers[i].bo != NULL)
464             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
465         mfc_context->direct_mv_buffers[i].bo = NULL;
466     }
467
468     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
469         if (mfc_context->reference_surfaces[i].bo != NULL)
470             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
471         mfc_context->reference_surfaces[i].bo = NULL;  
472     }
473
474     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
475     bo = dri_bo_alloc(i965->intel.bufmgr,
476                       "Buffer",
477                       width_in_mbs * 64,
478                       64);
479     assert(bo);
480     mfc_context->intra_row_store_scratch_buffer.bo = bo;
481
482     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
483     bo = dri_bo_alloc(i965->intel.bufmgr,
484                       "Buffer",
485                       width_in_mbs * height_in_mbs * 16,
486                       64);
487     assert(bo);
488     mfc_context->macroblock_status_buffer.bo = bo;
489
490     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
491     bo = dri_bo_alloc(i965->intel.bufmgr,
492                       "Buffer",
493                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
494                       64);
495     assert(bo);
496     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
497
498     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
499     bo = dri_bo_alloc(i965->intel.bufmgr,
500                       "Buffer",
501                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
502                       0x1000);
503     assert(bo);
504     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
505
506     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
507     mfc_context->mfc_batchbuffer_surface.bo = NULL;
508
509     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
510     mfc_context->aux_batchbuffer_surface.bo = NULL;
511
512     if (mfc_context->aux_batchbuffer)
513         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
514
515     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
516                                                         slice_batchbuffer_size);
517     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
518     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
519     mfc_context->aux_batchbuffer_surface.pitch = 16;
520     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
521     mfc_context->aux_batchbuffer_surface.size_block = 16;
522
523     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
524 }
525
526 static void
527 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
528                                     struct intel_encoder_context *encoder_context)
529 {
530     struct intel_batchbuffer *batch = encoder_context->base.batch;
531     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
532     int i;
533
534     BEGIN_BCS_BATCH(batch, 61);
535
536     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
537
538     /* the DW1-3 is for pre_deblocking */
539     if (mfc_context->pre_deblocking_output.bo)
540         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
541                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
542                       0);
543     else
544         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
545
546     OUT_BCS_BATCH(batch, 0);
547     OUT_BCS_BATCH(batch, 0);
548     /* the DW4-6 is for the post_deblocking */
549
550     if (mfc_context->post_deblocking_output.bo)
551         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
552                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
553                       0);                                                                                       /* post output addr  */ 
554     else
555         OUT_BCS_BATCH(batch, 0);
556     OUT_BCS_BATCH(batch, 0);
557     OUT_BCS_BATCH(batch, 0);
558
559     /* the DW7-9 is for the uncompressed_picture */
560     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
561                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
562                   0); /* uncompressed data */
563
564     OUT_BCS_BATCH(batch, 0);
565     OUT_BCS_BATCH(batch, 0);
566
567     /* the DW10-12 is for the mb status */
568     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
569                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
570                   0); /* StreamOut data*/
571     OUT_BCS_BATCH(batch, 0);
572     OUT_BCS_BATCH(batch, 0);
573
574     /* the DW13-15 is for the intra_row_store_scratch */
575     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
576                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
577                   0);   
578     OUT_BCS_BATCH(batch, 0);
579     OUT_BCS_BATCH(batch, 0);
580
581     /* the DW16-18 is for the deblocking filter */
582     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
583                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
584                   0);
585     OUT_BCS_BATCH(batch, 0);
586     OUT_BCS_BATCH(batch, 0);
587
588     /* the DW 19-50 is for Reference pictures*/
589     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
590         if ( mfc_context->reference_surfaces[i].bo != NULL) {
591             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
592                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
593                           0);                   
594         } else {
595             OUT_BCS_BATCH(batch, 0);
596         }
597         OUT_BCS_BATCH(batch, 0);
598     }
599     OUT_BCS_BATCH(batch, 0);
600
601     /* The DW 52-54 is for the MB status buffer */
602     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
603                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
604                   0);                                                                                   /* Macroblock status buffer*/
605         
606     OUT_BCS_BATCH(batch, 0);
607     OUT_BCS_BATCH(batch, 0);
608
609     /* the DW 55-57 is the ILDB buffer */
610     OUT_BCS_BATCH(batch, 0);
611     OUT_BCS_BATCH(batch, 0);
612     OUT_BCS_BATCH(batch, 0);
613
614     /* the DW 58-60 is the second ILDB buffer */
615     OUT_BCS_BATCH(batch, 0);
616     OUT_BCS_BATCH(batch, 0);
617     OUT_BCS_BATCH(batch, 0);
618     ADVANCE_BCS_BATCH(batch);
619 }
620
621 static void
622 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
623 {
624     struct intel_batchbuffer *batch = encoder_context->base.batch;
625     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
626     struct i965_driver_data *i965 = i965_driver_data(ctx);
627     int i;
628
629     if (IS_STEPPING_BPLUS(i965)) {
630         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
631         return;
632     }
633
634     BEGIN_BCS_BATCH(batch, 25);
635
636     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
637
638     if (mfc_context->pre_deblocking_output.bo)
639         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
640                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
641                       0);
642     else
643         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
644
645     if (mfc_context->post_deblocking_output.bo)
646         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
647                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
648                       0);                                                                                       /* post output addr  */ 
649     else
650         OUT_BCS_BATCH(batch, 0);
651
652     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
653                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
654                   0);                                                                                   /* uncompressed data */
655     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
656                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
657                   0);                                                                                   /* StreamOut data*/
658     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
659                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
660                   0);   
661     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
662                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
663                   0);
664     /* 7..22 Reference pictures*/
665     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
666         if ( mfc_context->reference_surfaces[i].bo != NULL) {
667             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
668                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
669                           0);                   
670         } else {
671             OUT_BCS_BATCH(batch, 0);
672         }
673     }
674     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
675                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
676                   0);                                                                                   /* Macroblock status buffer*/
677
678     OUT_BCS_BATCH(batch, 0);
679
680     ADVANCE_BCS_BATCH(batch);
681 }
682
683 static void
684 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
685                                      struct intel_encoder_context *encoder_context)
686 {
687     struct intel_batchbuffer *batch = encoder_context->base.batch;
688     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
689
690     int i;
691
692     BEGIN_BCS_BATCH(batch, 71);
693
694     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
695
696     /* Reference frames and Current frames */
697     /* the DW1-32 is for the direct MV for reference */
698     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
699         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
700             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
701                           I915_GEM_DOMAIN_INSTRUCTION, 0,
702                           0);
703             OUT_BCS_BATCH(batch, 0);
704         } else {
705             OUT_BCS_BATCH(batch, 0);
706             OUT_BCS_BATCH(batch, 0);
707         }
708     }
709     OUT_BCS_BATCH(batch, 0);
710
711     /* the DW34-36 is the MV for the current reference */
712     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
713                   I915_GEM_DOMAIN_INSTRUCTION, 0,
714                   0);
715
716     OUT_BCS_BATCH(batch, 0);
717     OUT_BCS_BATCH(batch, 0);
718
719     /* POL list */
720     for(i = 0; i < 32; i++) {
721         OUT_BCS_BATCH(batch, i/2);
722     }
723     OUT_BCS_BATCH(batch, 0);
724     OUT_BCS_BATCH(batch, 0);
725
726     ADVANCE_BCS_BATCH(batch);
727 }
728
729 static void
730 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
731 {
732     struct intel_batchbuffer *batch = encoder_context->base.batch;
733     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
734     struct i965_driver_data *i965 = i965_driver_data(ctx);
735     int i;
736
737     if (IS_STEPPING_BPLUS(i965)) {
738         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
739         return;
740     }
741
742     BEGIN_BCS_BATCH(batch, 69);
743
744     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
745
746     /* Reference frames and Current frames */
747     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
748         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
749             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
750                           I915_GEM_DOMAIN_INSTRUCTION, 0,
751                           0);
752         } else {
753             OUT_BCS_BATCH(batch, 0);
754         }
755     }
756
757     /* POL list */
758     for(i = 0; i < 32; i++) {
759         OUT_BCS_BATCH(batch, i/2);
760     }
761     OUT_BCS_BATCH(batch, 0);
762     OUT_BCS_BATCH(batch, 0);
763
764     ADVANCE_BCS_BATCH(batch);
765 }
766
767
768 static void
769 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
770                                         struct intel_encoder_context *encoder_context)
771 {
772     struct intel_batchbuffer *batch = encoder_context->base.batch;
773     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
774
775     BEGIN_BCS_BATCH(batch, 10);
776
777     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
778     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
779                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
780                   0);
781     OUT_BCS_BATCH(batch, 0);
782     OUT_BCS_BATCH(batch, 0);
783         
784     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
785     OUT_BCS_BATCH(batch, 0);
786     OUT_BCS_BATCH(batch, 0);
787     OUT_BCS_BATCH(batch, 0);
788
789     /* the DW7-9 is for Bitplane Read Buffer Base Address */
790     OUT_BCS_BATCH(batch, 0);
791     OUT_BCS_BATCH(batch, 0);
792     OUT_BCS_BATCH(batch, 0);
793
794     ADVANCE_BCS_BATCH(batch);
795 }
796
797 static void
798 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
799 {
800     struct intel_batchbuffer *batch = encoder_context->base.batch;
801     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
802     struct i965_driver_data *i965 = i965_driver_data(ctx);
803
804     if (IS_STEPPING_BPLUS(i965)) {
805         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
806         return;
807     }
808
809     BEGIN_BCS_BATCH(batch, 4);
810
811     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
812     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
813                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
814                   0);
815     OUT_BCS_BATCH(batch, 0);
816     OUT_BCS_BATCH(batch, 0);
817
818     ADVANCE_BCS_BATCH(batch);
819 }
820
821
822 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
823                                                        struct encode_state *encode_state,
824                                                        struct intel_encoder_context *encoder_context)
825 {
826     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
827
828     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
829     mfc_context->set_surface_state(ctx, encoder_context);
830     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
831     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
832     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
833     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
834     mfc_context->avc_qm_state(ctx, encode_state, encoder_context);
835     mfc_context->avc_fqm_state(ctx, encode_state, encoder_context);
836     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
837     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
838 }
839
840
841 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
842                               struct encode_state *encode_state,
843                               struct intel_encoder_context *encoder_context)
844 {
845     struct intel_batchbuffer *batch = encoder_context->base.batch;
846
847     intel_batchbuffer_flush(batch);             //run the pipeline
848
849     return VA_STATUS_SUCCESS;
850 }
851
852
853 static VAStatus
854 gen75_mfc_stop(VADriverContextP ctx, 
855                struct encode_state *encode_state,
856                struct intel_encoder_context *encoder_context,
857                int *encoded_bits_size)
858 {
859     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
860     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
861     VACodedBufferSegment *coded_buffer_segment;
862     
863     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
864     assert(vaStatus == VA_STATUS_SUCCESS);
865     *encoded_bits_size = coded_buffer_segment->size * 8;
866     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
867
868     return VA_STATUS_SUCCESS;
869 }
870
871
872 static void
873 gen75_mfc_avc_slice_state(VADriverContextP ctx,
874                           VAEncPictureParameterBufferH264 *pic_param,
875                           VAEncSliceParameterBufferH264 *slice_param,
876                           struct encode_state *encode_state,
877                           struct intel_encoder_context *encoder_context,
878                           int rate_control_enable,
879                           int qp,
880                           struct intel_batchbuffer *batch)
881 {
882     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
883     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
884     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
885     int beginmb = slice_param->macroblock_address;
886     int endmb = beginmb + slice_param->num_macroblocks;
887     int beginx = beginmb % width_in_mbs;
888     int beginy = beginmb / width_in_mbs;
889     int nextx =  endmb % width_in_mbs;
890     int nexty = endmb / width_in_mbs;
891     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
892     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
893     int maxQpN, maxQpP;
894     unsigned char correct[6], grow, shrink;
895     int i;
896     int weighted_pred_idc = 0;
897     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
898     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
899     int num_ref_l0 = 0, num_ref_l1 = 0;
900
901     if (batch == NULL)
902         batch = encoder_context->base.batch;
903
904     if (slice_type == SLICE_TYPE_I) {
905         luma_log2_weight_denom = 0;
906         chroma_log2_weight_denom = 0;
907     } else if (slice_type == SLICE_TYPE_P) {
908         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
909         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
910
911         if (slice_param->num_ref_idx_active_override_flag)
912             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
913     } else if (slice_type == SLICE_TYPE_B) {
914         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
915         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
916         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
917
918         if (slice_param->num_ref_idx_active_override_flag) {
919             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
920             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
921         }
922
923         if (weighted_pred_idc == 2) {
924             /* 8.4.3 - Derivation process for prediction weights (8-279) */
925             luma_log2_weight_denom = 5;
926             chroma_log2_weight_denom = 5;
927         }
928     }
929
930     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
931     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
932
933     for (i = 0; i < 6; i++)
934         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
935
936     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
937         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
938     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
939         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
940
941     BEGIN_BCS_BATCH(batch, 11);;
942
943     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
944     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
945
946     OUT_BCS_BATCH(batch,
947                   (num_ref_l0 << 16) |
948                   (num_ref_l1 << 24) |
949                   (chroma_log2_weight_denom << 8) |
950                   (luma_log2_weight_denom << 0));
951
952     OUT_BCS_BATCH(batch, 
953                   (weighted_pred_idc << 30) |
954                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
955                   (slice_param->disable_deblocking_filter_idc << 27) |
956                   (slice_param->cabac_init_idc << 24) |
957                   (qp<<16) |                    /*Slice Quantization Parameter*/
958                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
959                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
960     OUT_BCS_BATCH(batch,
961                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
962                   (beginx << 16) |
963                   slice_param->macroblock_address );
964     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
965     OUT_BCS_BATCH(batch, 
966                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
967                   (1 << 30) |           /*ResetRateControlCounter*/
968                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
969                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
970                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
971                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
972                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
973                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
974                   (last_slice << 19) |     /*IsLastSlice*/
975                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
976                   (1 << 17) |       /*HeaderPresentFlag*/       
977                   (1 << 16) |       /*SliceData PresentFlag*/
978                   (1 << 15) |       /*TailPresentFlag*/
979                   (1 << 13) |       /*RBSP NAL TYPE*/   
980                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
981     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
982     OUT_BCS_BATCH(batch,
983                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
984                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
985                   (shrink << 8)  |
986                   (grow << 0));   
987     OUT_BCS_BATCH(batch,
988                   (correct[5] << 20) |
989                   (correct[4] << 16) |
990                   (correct[3] << 12) |
991                   (correct[2] << 8) |
992                   (correct[1] << 4) |
993                   (correct[0] << 0));
994     OUT_BCS_BATCH(batch, 0);
995
996     ADVANCE_BCS_BATCH(batch);
997 }
998
999
1000
1001 static int
1002 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1003                                int qp,unsigned int *msg,
1004                                struct intel_encoder_context *encoder_context,
1005                                unsigned char target_mb_size, unsigned char max_mb_size,
1006                                struct intel_batchbuffer *batch)
1007 {
1008     int len_in_dwords = 12;
1009     unsigned int intra_msg;
1010 #define         INTRA_MSG_FLAG          (1 << 13)
1011 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1012     if (batch == NULL)
1013         batch = encoder_context->base.batch;
1014
1015     BEGIN_BCS_BATCH(batch, len_in_dwords);
1016
1017     intra_msg = msg[0] & 0xC0FF;
1018     intra_msg |= INTRA_MSG_FLAG;
1019     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1020     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1021     OUT_BCS_BATCH(batch, 0);
1022     OUT_BCS_BATCH(batch, 0);
1023     OUT_BCS_BATCH(batch, 
1024                   (0 << 24) |           /* PackedMvNum, Debug*/
1025                   (0 << 20) |           /* No motion vector */
1026                   (1 << 19) |           /* CbpDcY */
1027                   (1 << 18) |           /* CbpDcU */
1028                   (1 << 17) |           /* CbpDcV */
1029                   intra_msg);
1030
1031     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1032     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1033     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1034
1035     /*Stuff for Intra MB*/
1036     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1037     OUT_BCS_BATCH(batch, msg[2]);       
1038     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1039     
1040     /*MaxSizeInWord and TargetSzieInWord*/
1041     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1042                   (target_mb_size << 16) );
1043
1044     OUT_BCS_BATCH(batch, 0);
1045
1046     ADVANCE_BCS_BATCH(batch);
1047
1048     return len_in_dwords;
1049 }
1050
1051 static int
1052 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1053                                unsigned int *msg, unsigned int offset,
1054                                struct intel_encoder_context *encoder_context,
1055                                unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1056                                struct intel_batchbuffer *batch)
1057 {
1058     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1059     int len_in_dwords = 12;
1060     unsigned int inter_msg = 0;
1061     if (batch == NULL)
1062         batch = encoder_context->base.batch;
1063     {
1064 #define MSG_MV_OFFSET   4
1065         unsigned int *mv_ptr;
1066         mv_ptr = msg + MSG_MV_OFFSET;
1067         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1068          * to convert them to be compatible with the format of AVC_PAK
1069          * command.
1070          */
1071         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1072             /* MV[0] and MV[2] are replicated */
1073             mv_ptr[4] = mv_ptr[0];
1074             mv_ptr[5] = mv_ptr[1];
1075             mv_ptr[2] = mv_ptr[8];
1076             mv_ptr[3] = mv_ptr[9];
1077             mv_ptr[6] = mv_ptr[8];
1078             mv_ptr[7] = mv_ptr[9];
1079         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1080             /* MV[0] and MV[1] are replicated */
1081             mv_ptr[2] = mv_ptr[0];
1082             mv_ptr[3] = mv_ptr[1];
1083             mv_ptr[4] = mv_ptr[16];
1084             mv_ptr[5] = mv_ptr[17];
1085             mv_ptr[6] = mv_ptr[24];
1086             mv_ptr[7] = mv_ptr[25];
1087         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1088                    !(msg[1] & SUBMB_SHAPE_MASK)) {
1089             /* Don't touch MV[0] or MV[1] */
1090             mv_ptr[2] = mv_ptr[8];
1091             mv_ptr[3] = mv_ptr[9];
1092             mv_ptr[4] = mv_ptr[16];
1093             mv_ptr[5] = mv_ptr[17];
1094             mv_ptr[6] = mv_ptr[24];
1095             mv_ptr[7] = mv_ptr[25];
1096         }
1097     }
1098
1099     BEGIN_BCS_BATCH(batch, len_in_dwords);
1100
1101     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1102
1103     inter_msg = 32;
1104     /* MV quantity */
1105     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1106         if (msg[1] & SUBMB_SHAPE_MASK)
1107             inter_msg = 128;
1108     }
1109     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1110     OUT_BCS_BATCH(batch, offset);
1111     inter_msg = msg[0] & (0x1F00FFFF);
1112     inter_msg |= INTER_MV8;
1113     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1114     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1115         (msg[1] & SUBMB_SHAPE_MASK)) {
1116         inter_msg |= INTER_MV32;
1117     }
1118
1119     OUT_BCS_BATCH(batch, inter_msg);
1120
1121     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1122     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1123 #if 0 
1124     if ( slice_type == SLICE_TYPE_B) {
1125         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1126     } else {
1127         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1128     }
1129 #else
1130     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1131 #endif
1132
1133     inter_msg = msg[1] >> 8;
1134     /*Stuff for Inter MB*/
1135     OUT_BCS_BATCH(batch, inter_msg);        
1136     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1137     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1138
1139     /*MaxSizeInWord and TargetSzieInWord*/
1140     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1141                   (target_mb_size << 16) );
1142
1143     OUT_BCS_BATCH(batch, 0x0);    
1144
1145     ADVANCE_BCS_BATCH(batch);
1146
1147     return len_in_dwords;
1148 }
1149
1150 static void 
1151 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1152                                         struct encode_state *encode_state,
1153                                         struct intel_encoder_context *encoder_context,
1154                                         int slice_index,
1155                                         struct intel_batchbuffer *slice_batch)
1156 {
1157     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1158     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1159     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1160     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1161     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1162     unsigned int *msg = NULL, offset = 0;
1163     unsigned char *msg_ptr = NULL;
1164     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1165     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1166     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1167     int i,x,y;
1168     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1169     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1170     unsigned int tail_data[] = { 0x0, 0x0 };
1171     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1172     int is_intra = slice_type == SLICE_TYPE_I;
1173     int qp_slice;
1174     int qp_mb;
1175
1176     qp_slice = qp;
1177     if (rate_control_mode != VA_RC_CQP) {
1178         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1179         if (encode_state->slice_header_index[slice_index] == 0) {
1180             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1181             qp_slice = qp;
1182         }
1183     }
1184
1185     /* only support for 8-bit pixel bit-depth */
1186     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1187     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1188     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1189     assert(qp >= 0 && qp < 52);
1190
1191     gen75_mfc_avc_slice_state(ctx,
1192                               pPicParameter,
1193                               pSliceParameter,
1194                               encode_state, encoder_context,
1195                               (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
1196
1197     if ( slice_index == 0)
1198         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1199
1200     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1201
1202     dri_bo_map(vme_context->vme_output.bo , 1);
1203     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1204
1205     if (is_intra) {
1206         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1207     } else {
1208         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1209     }
1210    
1211     for (i = pSliceParameter->macroblock_address; 
1212          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1213         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1214         x = i % width_in_mbs;
1215         y = i / width_in_mbs;
1216         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1217
1218         if (vme_context->roi_enabled) {
1219             qp_mb = *(vme_context->qp_per_mb + i);
1220         } else
1221             qp_mb = qp;
1222
1223         if (is_intra) {
1224             assert(msg);
1225             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1226         } else {
1227             int inter_rdo, intra_rdo;
1228             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1229             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1230             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1231             if (intra_rdo < inter_rdo) { 
1232                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1233             } else {
1234                 msg += AVC_INTER_MSG_OFFSET;
1235                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb,
1236                                                msg, offset, encoder_context,
1237                                                0, 0, slice_type, slice_batch);
1238             }
1239         }
1240     }
1241    
1242     dri_bo_unmap(vme_context->vme_output.bo);
1243
1244     if ( last_slice ) {    
1245         mfc_context->insert_object(ctx, encoder_context,
1246                                    tail_data, 2, 8,
1247                                    2, 1, 1, 0, slice_batch);
1248     } else {
1249         mfc_context->insert_object(ctx, encoder_context,
1250                                    tail_data, 1, 8,
1251                                    1, 1, 1, 0, slice_batch);
1252     }
1253 }
1254
1255 static dri_bo *
1256 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1257                                    struct encode_state *encode_state,
1258                                    struct intel_encoder_context *encoder_context)
1259 {
1260     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1261     struct intel_batchbuffer *batch;
1262     dri_bo *batch_bo;
1263     int i;
1264
1265     batch = mfc_context->aux_batchbuffer;
1266     batch_bo = batch->buffer;
1267     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1268         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1269     }
1270
1271     intel_batchbuffer_align(batch, 8);
1272     
1273     BEGIN_BCS_BATCH(batch, 2);
1274     OUT_BCS_BATCH(batch, 0);
1275     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1276     ADVANCE_BCS_BATCH(batch);
1277
1278     dri_bo_reference(batch_bo);
1279
1280     intel_batchbuffer_free(batch);
1281     mfc_context->aux_batchbuffer = NULL;
1282
1283     return batch_bo;
1284 }
1285
1286
1287 static void
1288 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1289                                      struct encode_state *encode_state,
1290                                      struct intel_encoder_context *encoder_context)
1291
1292 {
1293     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1294     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1295
1296     assert(vme_context->vme_output.bo);
1297     mfc_context->buffer_suface_setup(ctx,
1298                                      &mfc_context->gpe_context,
1299                                      &vme_context->vme_output,
1300                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1301                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1302 }
1303
1304 static void
1305 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1306                                       struct encode_state *encode_state,
1307                                       struct intel_encoder_context *encoder_context)
1308
1309 {
1310     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1311     assert(mfc_context->aux_batchbuffer_surface.bo);
1312     mfc_context->buffer_suface_setup(ctx,
1313                                      &mfc_context->gpe_context,
1314                                      &mfc_context->aux_batchbuffer_surface,
1315                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1316                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1317 }
1318
1319 static void
1320 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1321                                      struct encode_state *encode_state,
1322                                      struct intel_encoder_context *encoder_context)
1323 {
1324     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1325     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1326 }
1327
1328 static void
1329 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1330                                  struct encode_state *encode_state,
1331                                  struct intel_encoder_context *encoder_context)
1332 {
1333     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1334     struct gen6_interface_descriptor_data *desc;   
1335     int i;
1336     dri_bo *bo;
1337
1338     bo = mfc_context->gpe_context.idrt.bo;
1339     dri_bo_map(bo, 1);
1340     assert(bo->virtual);
1341     desc = bo->virtual;
1342
1343     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1344         struct i965_kernel *kernel;
1345
1346         kernel = &mfc_context->gpe_context.kernels[i];
1347         assert(sizeof(*desc) == 32);
1348
1349         /*Setup the descritor table*/
1350         memset(desc, 0, sizeof(*desc));
1351         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1352         desc->desc2.sampler_count = 0;
1353         desc->desc2.sampler_state_pointer = 0;
1354         desc->desc3.binding_table_entry_count = 2;
1355         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1356         desc->desc4.constant_urb_entry_read_offset = 0;
1357         desc->desc4.constant_urb_entry_read_length = 4;
1358                 
1359         /*kernel start*/
1360         dri_bo_emit_reloc(bo,   
1361                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1362                           0,
1363                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1364                           kernel->bo);
1365         desc++;
1366     }
1367
1368     dri_bo_unmap(bo);
1369 }
1370
1371 static void
1372 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1373                                      struct encode_state *encode_state,
1374                                      struct intel_encoder_context *encoder_context)
1375 {
1376     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1377     
1378     (void)mfc_context;
1379 }
1380
1381 #define AVC_PAK_LEN_IN_BYTE     48
1382 #define AVC_PAK_LEN_IN_OWORD    3
1383
1384 static void
1385 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1386                                           uint32_t intra_flag,
1387                                           int head_offset,
1388                                           int number_mb_cmds,
1389                                           int slice_end_x,
1390                                           int slice_end_y,
1391                                           int mb_x,
1392                                           int mb_y,
1393                                           int width_in_mbs,
1394                                           int qp,
1395                                           uint32_t fwd_ref,
1396                                           uint32_t bwd_ref)
1397 {
1398     uint32_t temp_value;
1399     BEGIN_BATCH(batch, 14);
1400     
1401     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
1402     OUT_BATCH(batch, 0);
1403     OUT_BATCH(batch, 0);
1404     OUT_BATCH(batch, 0);
1405     OUT_BATCH(batch, 0);
1406     OUT_BATCH(batch, 0);
1407    
1408     /*inline data */
1409     OUT_BATCH(batch, head_offset / 16);
1410     OUT_BATCH(batch, (intra_flag) | (qp << 16));
1411     temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
1412     OUT_BATCH(batch, temp_value);
1413
1414     OUT_BATCH(batch, number_mb_cmds);
1415
1416     OUT_BATCH(batch,
1417               ((slice_end_y << 8) | (slice_end_x)));
1418     OUT_BATCH(batch, fwd_ref);
1419     OUT_BATCH(batch, bwd_ref);
1420
1421     OUT_BATCH(batch, MI_NOOP);
1422
1423     ADVANCE_BATCH(batch);
1424 }
1425
1426 static void
1427 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1428                                         struct intel_encoder_context *encoder_context,
1429                                         VAEncSliceParameterBufferH264 *slice_param,
1430                                         int head_offset,
1431                                         int qp,
1432                                         int last_slice)
1433 {
1434     struct intel_batchbuffer *batch = encoder_context->base.batch;
1435     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1436     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1437     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1438     int total_mbs = slice_param->num_macroblocks;
1439     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1440     int number_mb_cmds = 128;
1441     int starting_offset = 0;
1442     int mb_x, mb_y;
1443     int last_mb, slice_end_x, slice_end_y;
1444     int remaining_mb = total_mbs;
1445     uint32_t fwd_ref , bwd_ref, mb_flag;
1446
1447     last_mb = slice_param->macroblock_address + total_mbs - 1;
1448     slice_end_x = last_mb % width_in_mbs;
1449     slice_end_y = last_mb / width_in_mbs;
1450
1451     if (slice_type == SLICE_TYPE_I) {
1452         fwd_ref = 0;
1453         bwd_ref = 0;
1454         mb_flag = 1;
1455     } else {
1456         fwd_ref = vme_context->ref_index_in_mb[0];
1457         bwd_ref = vme_context->ref_index_in_mb[1];
1458         mb_flag = 0;
1459     }
1460
1461     if (width_in_mbs >= 100) {
1462         number_mb_cmds = width_in_mbs / 5;
1463     } else if (width_in_mbs >= 80) {
1464         number_mb_cmds = width_in_mbs / 4;
1465     } else if (width_in_mbs >= 60) {
1466         number_mb_cmds = width_in_mbs / 3;
1467     } else if (width_in_mbs >= 40) {
1468         number_mb_cmds = width_in_mbs / 2;
1469     } else {
1470         number_mb_cmds = width_in_mbs;
1471     }
1472
1473     do {
1474         if (number_mb_cmds >= remaining_mb) {
1475                 number_mb_cmds = remaining_mb;
1476         }
1477         mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
1478         mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
1479
1480         gen75_mfc_batchbuffer_emit_object_command(batch,
1481                                                   mb_flag,
1482                                                   head_offset,
1483                                                   number_mb_cmds,
1484                                                   slice_end_x,
1485                                                   slice_end_y,
1486                                                   mb_x,
1487                                                   mb_y,
1488                                                   width_in_mbs,
1489                                                   qp,
1490                                                   fwd_ref,
1491                                                   bwd_ref);
1492
1493         head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
1494         remaining_mb -= number_mb_cmds;
1495         starting_offset += number_mb_cmds;
1496     } while (remaining_mb > 0);
1497 }
1498                           
1499 /*
1500  * return size in Owords (16bytes)
1501  */         
1502 static void
1503 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1504                                 struct encode_state *encode_state,
1505                                 struct intel_encoder_context *encoder_context,
1506                                 int slice_index)
1507 {
1508     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1509     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1510     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1511     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1512     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1513     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1514     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1515     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1516     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1517     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1518     unsigned int tail_data[] = { 0x0, 0x0 };
1519     long head_offset;
1520     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1521     int qp_slice;
1522
1523     qp_slice = qp;
1524     if (rate_control_mode != VA_RC_CQP) {
1525         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1526         if (encode_state->slice_header_index[slice_index] == 0) {
1527             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1528             qp_slice = qp;
1529         }
1530     }
1531
1532     /* only support for 8-bit pixel bit-depth */
1533     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1534     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1535     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1536     assert(qp >= 0 && qp < 52);
1537
1538     gen75_mfc_avc_slice_state(ctx,
1539                               pPicParameter,
1540                               pSliceParameter,
1541                               encode_state,
1542                               encoder_context,
1543                               (rate_control_mode != VA_RC_CQP),
1544                               qp_slice,
1545                               slice_batch);
1546
1547     if (slice_index == 0)
1548         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1549
1550     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1551
1552     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1553     head_offset = intel_batchbuffer_used_size(slice_batch);
1554
1555     slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
1556
1557     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1558                                             encoder_context,
1559                                             pSliceParameter,
1560                                             head_offset,
1561                                             qp,
1562                                             last_slice);
1563
1564
1565     /* Aligned for tail */
1566     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1567     if (last_slice) {    
1568         mfc_context->insert_object(ctx,
1569                                    encoder_context,
1570                                    tail_data,
1571                                    2,
1572                                    8,
1573                                    2,
1574                                    1,
1575                                    1,
1576                                    0,
1577                                    slice_batch);
1578     } else {
1579         mfc_context->insert_object(ctx,
1580                                    encoder_context,
1581                                    tail_data,
1582                                    1,
1583                                    8,
1584                                    1,
1585                                    1,
1586                                    1,
1587                                    0,
1588                                    slice_batch);
1589     }
1590
1591     return;
1592 }
1593
1594 static void
1595 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1596                                    struct encode_state *encode_state,
1597                                    struct intel_encoder_context *encoder_context)
1598 {
1599     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1600     struct intel_batchbuffer *batch = encoder_context->base.batch;
1601     int i;
1602     intel_batchbuffer_start_atomic(batch, 0x4000); 
1603     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1604
1605     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1606         gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
1607     }
1608     {
1609         struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1610         intel_batchbuffer_align(slice_batch, 8);
1611         BEGIN_BCS_BATCH(slice_batch, 2);
1612         OUT_BCS_BATCH(slice_batch, 0);
1613         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
1614         ADVANCE_BCS_BATCH(slice_batch);
1615         mfc_context->aux_batchbuffer = NULL;
1616         intel_batchbuffer_free(slice_batch);
1617     }
1618     intel_batchbuffer_end_atomic(batch);
1619     intel_batchbuffer_flush(batch);
1620 }
1621
1622 static void
1623 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1624                                 struct encode_state *encode_state,
1625                                 struct intel_encoder_context *encoder_context)
1626 {
1627     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1628     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1629     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1630     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1631 }
1632
1633 static dri_bo *
1634 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1635                                    struct encode_state *encode_state,
1636                                    struct intel_encoder_context *encoder_context)
1637 {
1638     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1639
1640     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1641     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1642
1643     return mfc_context->aux_batchbuffer_surface.bo;
1644 }
1645
1646
1647 static void
1648 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1649                                   struct encode_state *encode_state,
1650                                   struct intel_encoder_context *encoder_context)
1651 {
1652     struct intel_batchbuffer *batch = encoder_context->base.batch;
1653     dri_bo *slice_batch_bo;
1654
1655     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1656         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1657         assert(0);
1658         return; 
1659     }
1660
1661
1662     if (encoder_context->soft_batch_force)
1663         slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1664     else
1665         slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1666
1667     // begin programing
1668     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1669     intel_batchbuffer_emit_mi_flush(batch);
1670     
1671     // picture level programing
1672     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1673
1674     BEGIN_BCS_BATCH(batch, 2);
1675     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1676     OUT_BCS_RELOC(batch,
1677                   slice_batch_bo,
1678                   I915_GEM_DOMAIN_COMMAND, 0, 
1679                   0);
1680     ADVANCE_BCS_BATCH(batch);
1681
1682     // end programing
1683     intel_batchbuffer_end_atomic(batch);
1684
1685     dri_bo_unreference(slice_batch_bo);
1686 }
1687
1688
1689 static VAStatus
1690 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1691                              struct encode_state *encode_state,
1692                              struct intel_encoder_context *encoder_context)
1693 {
1694     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1695     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1696     int current_frame_bits_size;
1697     int sts;
1698  
1699     for (;;) {
1700         gen75_mfc_init(ctx, encode_state, encoder_context);
1701         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1702         /*Programing bcs pipeline*/
1703         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1704         gen75_mfc_run(ctx, encode_state, encoder_context);
1705         if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
1706             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1707             sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
1708             if (sts == BRC_NO_HRD_VIOLATION) {
1709                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1710                 break;
1711             }
1712             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1713                 if (!mfc_context->hrd.violation_noted) {
1714                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1715                     mfc_context->hrd.violation_noted = 1;
1716                 }
1717                 return VA_STATUS_SUCCESS;
1718             }
1719         } else {
1720             break;
1721         }
1722     }
1723
1724     return VA_STATUS_SUCCESS;
1725 }
1726
1727 /*
1728  * MPEG-2
1729  */
1730
1731 static const int
1732 va_to_gen75_mpeg2_picture_type[3] = {
1733     1,  /* I */
1734     2,  /* P */
1735     3   /* B */
1736 };
1737
1738 static void
1739 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1740                           struct intel_encoder_context *encoder_context,
1741                           struct encode_state *encode_state)
1742 {
1743     struct intel_batchbuffer *batch = encoder_context->base.batch;
1744     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1745     VAEncPictureParameterBufferMPEG2 *pic_param;
1746     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1747     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1748     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1749
1750     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1751     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1752     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1753
1754     BEGIN_BCS_BATCH(batch, 13);
1755     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1756     OUT_BCS_BATCH(batch,
1757                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1758                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1759                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1760                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1761                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1762                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1763                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1764                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1765                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1766                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1767                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1768                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1769     OUT_BCS_BATCH(batch,
1770                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1771                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1772                   0);
1773     OUT_BCS_BATCH(batch,
1774                   1 << 31 |     /* slice concealment */
1775                   (height_in_mbs - 1) << 16 |
1776                   (width_in_mbs - 1));
1777     if (slice_param && slice_param->quantiser_scale_code >= 14) 
1778         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1779     else
1780         OUT_BCS_BATCH(batch, 0);
1781
1782     OUT_BCS_BATCH(batch, 0);
1783     OUT_BCS_BATCH(batch,
1784                   0xFFF << 16 | /* InterMBMaxSize */
1785                   0xFFF << 0 |  /* IntraMBMaxSize */
1786                   0);
1787     OUT_BCS_BATCH(batch, 0);
1788     OUT_BCS_BATCH(batch, 0);
1789     OUT_BCS_BATCH(batch, 0);
1790     OUT_BCS_BATCH(batch, 0);
1791     OUT_BCS_BATCH(batch, 0);
1792     OUT_BCS_BATCH(batch, 0);
1793     ADVANCE_BCS_BATCH(batch);
1794 }
1795
1796 static void
1797 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1798 {
1799     unsigned char intra_qm[64] = {
1800         8, 16, 19, 22, 26, 27, 29, 34,
1801         16, 16, 22, 24, 27, 29, 34, 37,
1802         19, 22, 26, 27, 29, 34, 34, 38,
1803         22, 22, 26, 27, 29, 34, 37, 40,
1804         22, 26, 27, 29, 32, 35, 40, 48,
1805         26, 27, 29, 32, 35, 40, 48, 58,
1806         26, 27, 29, 34, 38, 46, 56, 69,
1807         27, 29, 35, 38, 46, 56, 69, 83
1808     };
1809
1810     unsigned char non_intra_qm[64] = {
1811         16, 16, 16, 16, 16, 16, 16, 16,
1812         16, 16, 16, 16, 16, 16, 16, 16,
1813         16, 16, 16, 16, 16, 16, 16, 16,
1814         16, 16, 16, 16, 16, 16, 16, 16,
1815         16, 16, 16, 16, 16, 16, 16, 16,
1816         16, 16, 16, 16, 16, 16, 16, 16,
1817         16, 16, 16, 16, 16, 16, 16, 16,
1818         16, 16, 16, 16, 16, 16, 16, 16
1819     };
1820
1821     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1822     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1823 }
1824
1825 static void
1826 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1827 {
1828     unsigned short intra_fqm[64] = {
1829         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1830         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1831         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1832         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1833         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1834         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1835         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1836         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1837     };
1838
1839     unsigned short non_intra_fqm[64] = {
1840         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1841         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1842         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1843         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1844         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1845         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1846         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1847         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1848     };
1849
1850     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1851     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1852 }
1853
1854 static void
1855 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1856                                  struct intel_encoder_context *encoder_context,
1857                                  int x, int y,
1858                                  int next_x, int next_y,
1859                                  int is_fisrt_slice_group,
1860                                  int is_last_slice_group,
1861                                  int intra_slice,
1862                                  int qp,
1863                                  struct intel_batchbuffer *batch)
1864 {
1865     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1866
1867     if (batch == NULL)
1868         batch = encoder_context->base.batch;
1869
1870     BEGIN_BCS_BATCH(batch, 8);
1871
1872     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1873     OUT_BCS_BATCH(batch,
1874                   0 << 31 |                             /* MbRateCtrlFlag */
1875                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1876                   1 << 17 |                             /* Insert Header before the first slice group data */
1877                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1878                   1 << 15 |                             /* TailPresentFlag: always 1 */
1879                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1880                   !!intra_slice << 13 |                 /* IntraSlice */
1881                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1882                   0);
1883     OUT_BCS_BATCH(batch,
1884                   next_y << 24 |
1885                   next_x << 16 |
1886                   y << 8 |
1887                   x << 0 |
1888                   0);
1889     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1890     /* bitstream pointer is only loaded once for the first slice of a frame when 
1891      * LoadSlicePointerFlag is 0
1892      */
1893     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1894     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1895     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1896     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1897
1898     ADVANCE_BCS_BATCH(batch);
1899 }
1900
1901 static int
1902 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1903                                  struct intel_encoder_context *encoder_context,
1904                                  int x, int y,
1905                                  int first_mb_in_slice,
1906                                  int last_mb_in_slice,
1907                                  int first_mb_in_slice_group,
1908                                  int last_mb_in_slice_group,
1909                                  int mb_type,
1910                                  int qp_scale_code,
1911                                  int coded_block_pattern,
1912                                  unsigned char target_size_in_word,
1913                                  unsigned char max_size_in_word,
1914                                  struct intel_batchbuffer *batch)
1915 {
1916     int len_in_dwords = 9;
1917
1918     if (batch == NULL)
1919         batch = encoder_context->base.batch;
1920
1921     BEGIN_BCS_BATCH(batch, len_in_dwords);
1922
1923     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1924     OUT_BCS_BATCH(batch,
1925                   0 << 24 |     /* PackedMvNum */
1926                   0 << 20 |     /* MvFormat */
1927                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1928                   0 << 15 |     /* TransformFlag: frame DCT */
1929                   0 << 14 |     /* FieldMbFlag */
1930                   1 << 13 |     /* IntraMbFlag */
1931                   mb_type << 8 |   /* MbType: Intra */
1932                   0 << 2 |      /* SkipMbFlag */
1933                   0 << 0 |      /* InterMbMode */
1934                   0);
1935     OUT_BCS_BATCH(batch, y << 16 | x);
1936     OUT_BCS_BATCH(batch,
1937                   max_size_in_word << 24 |
1938                   target_size_in_word << 16 |
1939                   coded_block_pattern << 6 |      /* CBP */
1940                   0);
1941     OUT_BCS_BATCH(batch,
1942                   last_mb_in_slice << 31 |
1943                   first_mb_in_slice << 30 |
1944                   0 << 27 |     /* EnableCoeffClamp */
1945                   last_mb_in_slice_group << 26 |
1946                   0 << 25 |     /* MbSkipConvDisable */
1947                   first_mb_in_slice_group << 24 |
1948                   0 << 16 |     /* MvFieldSelect */
1949                   qp_scale_code << 0 |
1950                   0);
1951     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1952     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1953     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1954     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1955
1956     ADVANCE_BCS_BATCH(batch);
1957
1958     return len_in_dwords;
1959 }
1960
1961 #define MPEG2_INTER_MV_OFFSET   12 
1962
1963 static struct _mv_ranges
1964 {
1965     int low;    /* in the unit of 1/2 pixel */
1966     int high;   /* in the unit of 1/2 pixel */
1967 } mv_ranges[] = {
1968     {0, 0},
1969     {-16, 15},
1970     {-32, 31},
1971     {-64, 63},
1972     {-128, 127},
1973     {-256, 255},
1974     {-512, 511},
1975     {-1024, 1023},
1976     {-2048, 2047},
1977     {-4096, 4095}
1978 };
1979
1980 static int
1981 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1982 {
1983     if (mv + pos * 16 * 2 < 0 ||
1984         mv + (pos + 1) * 16 * 2 > display_max * 2)
1985         mv = 0;
1986
1987     if (f_code > 0 && f_code < 10) {
1988         if (mv < mv_ranges[f_code].low)
1989             mv = mv_ranges[f_code].low;
1990
1991         if (mv > mv_ranges[f_code].high)
1992             mv = mv_ranges[f_code].high;
1993     }
1994
1995     return mv;
1996 }
1997
1998 static int
1999 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2000                                  struct encode_state *encode_state,
2001                                  struct intel_encoder_context *encoder_context,
2002                                  unsigned int *msg,
2003                                  int width_in_mbs, int height_in_mbs,
2004                                  int x, int y,
2005                                  int first_mb_in_slice,
2006                                  int last_mb_in_slice,
2007                                  int first_mb_in_slice_group,
2008                                  int last_mb_in_slice_group,
2009                                  int qp_scale_code,
2010                                  unsigned char target_size_in_word,
2011                                  unsigned char max_size_in_word,
2012                                  struct intel_batchbuffer *batch)
2013 {
2014     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2015     int len_in_dwords = 9;
2016     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2017     
2018     if (batch == NULL)
2019         batch = encoder_context->base.batch;
2020
2021     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
2022     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2023     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2024     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2025     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2026
2027     BEGIN_BCS_BATCH(batch, len_in_dwords);
2028
2029     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2030     OUT_BCS_BATCH(batch,
2031                   2 << 24 |     /* PackedMvNum */
2032                   7 << 20 |     /* MvFormat */
2033                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2034                   0 << 15 |     /* TransformFlag: frame DCT */
2035                   0 << 14 |     /* FieldMbFlag */
2036                   0 << 13 |     /* IntraMbFlag */
2037                   1 << 8 |      /* MbType: Frame-based */
2038                   0 << 2 |      /* SkipMbFlag */
2039                   0 << 0 |      /* InterMbMode */
2040                   0);
2041     OUT_BCS_BATCH(batch, y << 16 | x);
2042     OUT_BCS_BATCH(batch,
2043                   max_size_in_word << 24 |
2044                   target_size_in_word << 16 |
2045                   0x3f << 6 |   /* CBP */
2046                   0);
2047     OUT_BCS_BATCH(batch,
2048                   last_mb_in_slice << 31 |
2049                   first_mb_in_slice << 30 |
2050                   0 << 27 |     /* EnableCoeffClamp */
2051                   last_mb_in_slice_group << 26 |
2052                   0 << 25 |     /* MbSkipConvDisable */
2053                   first_mb_in_slice_group << 24 |
2054                   0 << 16 |     /* MvFieldSelect */
2055                   qp_scale_code << 0 |
2056                   0);
2057
2058     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2059     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2060     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2061     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2062
2063     ADVANCE_BCS_BATCH(batch);
2064
2065     return len_in_dwords;
2066 }
2067
2068 static void
2069 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2070                                            struct encode_state *encode_state,
2071                                            struct intel_encoder_context *encoder_context,
2072                                            struct intel_batchbuffer *slice_batch)
2073 {
2074     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2075     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2076
2077     if (encode_state->packed_header_data[idx]) {
2078         VAEncPackedHeaderParameterBuffer *param = NULL;
2079         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2080         unsigned int length_in_bits;
2081
2082         assert(encode_state->packed_header_param[idx]);
2083         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2084         length_in_bits = param->bit_length;
2085
2086         mfc_context->insert_object(ctx,
2087                                    encoder_context,
2088                                    header_data,
2089                                    ALIGN(length_in_bits, 32) >> 5,
2090                                    length_in_bits & 0x1f,
2091                                    5,   /* FIXME: check it */
2092                                    0,
2093                                    0,
2094                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2095                                    slice_batch);
2096     }
2097
2098     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2099
2100     if (encode_state->packed_header_data[idx]) {
2101         VAEncPackedHeaderParameterBuffer *param = NULL;
2102         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2103         unsigned int length_in_bits;
2104
2105         assert(encode_state->packed_header_param[idx]);
2106         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2107         length_in_bits = param->bit_length;
2108
2109         mfc_context->insert_object(ctx,
2110                                    encoder_context,
2111                                    header_data,
2112                                    ALIGN(length_in_bits, 32) >> 5,
2113                                    length_in_bits & 0x1f,
2114                                    5,   /* FIXME: check it */
2115                                    0,
2116                                    0,
2117                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2118                                    slice_batch);
2119     }
2120 }
2121
2122 static void 
2123 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2124                                      struct encode_state *encode_state,
2125                                      struct intel_encoder_context *encoder_context,
2126                                      int slice_index,
2127                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2128                                      struct intel_batchbuffer *slice_batch)
2129 {
2130     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2131     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2132     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2133     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2134     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2135     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2136     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2137     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2138     int i, j;
2139     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2140     unsigned int *msg = NULL;
2141     unsigned char *msg_ptr = NULL;
2142
2143     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2144     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2145     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2146     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2147
2148     dri_bo_map(vme_context->vme_output.bo , 0);
2149     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2150
2151     if (next_slice_group_param) {
2152         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2153         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2154     } else {
2155         h_next_start_pos = 0;
2156         v_next_start_pos = height_in_mbs;
2157     }
2158
2159     gen75_mfc_mpeg2_slicegroup_state(ctx,
2160                                      encoder_context,
2161                                      h_start_pos,
2162                                      v_start_pos,
2163                                      h_next_start_pos,
2164                                      v_next_start_pos,
2165                                      slice_index == 0,
2166                                      next_slice_group_param == NULL,
2167                                      slice_param->is_intra_slice,
2168                                      slice_param->quantiser_scale_code,
2169                                      slice_batch);
2170
2171     if (slice_index == 0) 
2172         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2173
2174     /* Insert '00' to make sure the header is valid */
2175     mfc_context->insert_object(ctx,
2176                                encoder_context,
2177                                (unsigned int*)section_delimiter,
2178                                1,
2179                                8,   /* 8bits in the last DWORD */
2180                                1,   /* 1 byte */
2181                                1,
2182                                0,
2183                                0,
2184                                slice_batch);
2185
2186     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2187         /* PAK for each macroblocks */
2188         for (j = 0; j < slice_param->num_macroblocks; j++) {
2189             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2190             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2191             int first_mb_in_slice = (j == 0);
2192             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2193             int first_mb_in_slice_group = (i == 0 && j == 0);
2194             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2195                                           j == slice_param->num_macroblocks - 1);
2196
2197             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2198
2199             if (slice_param->is_intra_slice) {
2200                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2201                                                  encoder_context,
2202                                                  h_pos, v_pos,
2203                                                  first_mb_in_slice,
2204                                                  last_mb_in_slice,
2205                                                  first_mb_in_slice_group,
2206                                                  last_mb_in_slice_group,
2207                                                  0x1a,
2208                                                  slice_param->quantiser_scale_code,
2209                                                  0x3f,
2210                                                  0,
2211                                                  0xff,
2212                                                  slice_batch);
2213             } else {
2214                 int inter_rdo, intra_rdo;
2215                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2216                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2217
2218                 if (intra_rdo < inter_rdo) 
2219                     gen75_mfc_mpeg2_pak_object_intra(ctx,
2220                                                      encoder_context,
2221                                                      h_pos, v_pos,
2222                                                      first_mb_in_slice,
2223                                                      last_mb_in_slice,
2224                                                      first_mb_in_slice_group,
2225                                                      last_mb_in_slice_group,
2226                                                      0x1a,
2227                                                      slice_param->quantiser_scale_code,
2228                                                      0x3f,
2229                                                      0,
2230                                                      0xff,
2231                                                      slice_batch);
2232                 else
2233                     gen75_mfc_mpeg2_pak_object_inter(ctx,
2234                                                      encode_state,
2235                                                      encoder_context,
2236                                                      msg,
2237                                                      width_in_mbs, height_in_mbs,
2238                                                      h_pos, v_pos,
2239                                                      first_mb_in_slice,
2240                                                      last_mb_in_slice,
2241                                                      first_mb_in_slice_group,
2242                                                      last_mb_in_slice_group,
2243                                                      slice_param->quantiser_scale_code,
2244                                                      0,
2245                                                      0xff,
2246                                                      slice_batch);
2247             }
2248         }
2249
2250         slice_param++;
2251     }
2252
2253     dri_bo_unmap(vme_context->vme_output.bo);
2254
2255     /* tail data */
2256     if (next_slice_group_param == NULL) { /* end of a picture */
2257         mfc_context->insert_object(ctx,
2258                                    encoder_context,
2259                                    (unsigned int *)tail_delimiter,
2260                                    2,
2261                                    8,   /* 8bits in the last DWORD */
2262                                    5,   /* 5 bytes */
2263                                    1,
2264                                    1,
2265                                    0,
2266                                    slice_batch);
2267     } else {        /* end of a lsice group */
2268         mfc_context->insert_object(ctx,
2269                                    encoder_context,
2270                                    (unsigned int *)section_delimiter,
2271                                    1,
2272                                    8,   /* 8bits in the last DWORD */
2273                                    1,   /* 1 byte */
2274                                    1,
2275                                    1,
2276                                    0,
2277                                    slice_batch);
2278     }
2279 }
2280
2281 /* 
2282  * A batch buffer for all slices, including slice state, 
2283  * slice insert object and slice pak object commands
2284  *
2285  */
2286 static dri_bo *
2287 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2288                                            struct encode_state *encode_state,
2289                                            struct intel_encoder_context *encoder_context)
2290 {
2291     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2292     struct intel_batchbuffer *batch;
2293     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2294     dri_bo *batch_bo;
2295     int i;
2296
2297     batch = mfc_context->aux_batchbuffer;
2298     batch_bo = batch->buffer;
2299
2300     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2301         if (i == encode_state->num_slice_params_ext - 1)
2302             next_slice_group_param = NULL;
2303         else
2304             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2305
2306         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2307     }
2308
2309     intel_batchbuffer_align(batch, 8);
2310     
2311     BEGIN_BCS_BATCH(batch, 2);
2312     OUT_BCS_BATCH(batch, 0);
2313     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2314     ADVANCE_BCS_BATCH(batch);
2315
2316     dri_bo_reference(batch_bo);
2317     intel_batchbuffer_free(batch);
2318     mfc_context->aux_batchbuffer = NULL;
2319
2320     return batch_bo;
2321 }
2322
2323 static void
2324 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2325                                             struct encode_state *encode_state,
2326                                             struct intel_encoder_context *encoder_context)
2327 {
2328     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2329
2330     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2331     mfc_context->set_surface_state(ctx, encoder_context);
2332     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2333     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2334     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2335     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2336     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2337     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2338 }
2339
2340 static void
2341 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2342                                     struct encode_state *encode_state,
2343                                     struct intel_encoder_context *encoder_context)
2344 {
2345     struct intel_batchbuffer *batch = encoder_context->base.batch;
2346     dri_bo *slice_batch_bo;
2347
2348     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2349
2350     // begin programing
2351     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2352     intel_batchbuffer_emit_mi_flush(batch);
2353     
2354     // picture level programing
2355     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2356
2357     BEGIN_BCS_BATCH(batch, 2);
2358     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2359     OUT_BCS_RELOC(batch,
2360                   slice_batch_bo,
2361                   I915_GEM_DOMAIN_COMMAND, 0, 
2362                   0);
2363     ADVANCE_BCS_BATCH(batch);
2364
2365     // end programing
2366     intel_batchbuffer_end_atomic(batch);
2367
2368     dri_bo_unreference(slice_batch_bo);
2369 }
2370
2371 static VAStatus
2372 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2373                         struct encode_state *encode_state,
2374                         struct intel_encoder_context *encoder_context)
2375 {
2376     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2377     struct object_surface *obj_surface; 
2378     struct object_buffer *obj_buffer;
2379     struct i965_coded_buffer_segment *coded_buffer_segment;
2380     VAStatus vaStatus = VA_STATUS_SUCCESS;
2381     dri_bo *bo;
2382     int i;
2383
2384     /* reconstructed surface */
2385     obj_surface = encode_state->reconstructed_object;
2386     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2387     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2388     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2389     mfc_context->surface_state.width = obj_surface->orig_width;
2390     mfc_context->surface_state.height = obj_surface->orig_height;
2391     mfc_context->surface_state.w_pitch = obj_surface->width;
2392     mfc_context->surface_state.h_pitch = obj_surface->height;
2393
2394     /* forward reference */
2395     obj_surface = encode_state->reference_objects[0];
2396
2397     if (obj_surface && obj_surface->bo) {
2398         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2399         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2400     } else
2401         mfc_context->reference_surfaces[0].bo = NULL;
2402
2403     /* backward reference */
2404     obj_surface = encode_state->reference_objects[1];
2405
2406     if (obj_surface && obj_surface->bo) {
2407         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2408         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2409     } else {
2410         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2411
2412         if (mfc_context->reference_surfaces[1].bo)
2413             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2414     }
2415
2416     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2417         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2418
2419         if (mfc_context->reference_surfaces[i].bo)
2420             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2421     }
2422     
2423     /* input YUV surface */
2424     obj_surface = encode_state->input_yuv_object;
2425     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2426     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2427
2428     /* coded buffer */
2429     obj_buffer = encode_state->coded_buf_object;
2430     bo = obj_buffer->buffer_store->bo;
2431     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2432     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2433     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2434     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2435
2436     /* set the internal flag to 0 to indicate the coded size is unknown */
2437     dri_bo_map(bo, 1);
2438     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2439     coded_buffer_segment->mapped = 0;
2440     coded_buffer_segment->codec = encoder_context->codec;
2441     dri_bo_unmap(bo);
2442
2443     return vaStatus;
2444 }
2445
2446 static VAStatus
2447 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2448                                struct encode_state *encode_state,
2449                                struct intel_encoder_context *encoder_context)
2450 {
2451     gen75_mfc_init(ctx, encode_state, encoder_context);
2452     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2453     /*Programing bcs pipeline*/
2454     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2455     gen75_mfc_run(ctx, encode_state, encoder_context);
2456
2457     return VA_STATUS_SUCCESS;
2458 }
2459
2460 static void
2461 gen75_mfc_context_destroy(void *context)
2462 {
2463     struct gen6_mfc_context *mfc_context = context;
2464     int i;
2465
2466     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2467     mfc_context->post_deblocking_output.bo = NULL;
2468
2469     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2470     mfc_context->pre_deblocking_output.bo = NULL;
2471
2472     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2473     mfc_context->uncompressed_picture_source.bo = NULL;
2474
2475     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2476     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2477
2478     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2479         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2480         mfc_context->direct_mv_buffers[i].bo = NULL;
2481     }
2482
2483     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2484     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2485
2486     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2487     mfc_context->macroblock_status_buffer.bo = NULL;
2488
2489     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2490     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2491
2492     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2493     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2494
2495     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2496         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2497         mfc_context->reference_surfaces[i].bo = NULL;  
2498     }
2499
2500     i965_gpe_context_destroy(&mfc_context->gpe_context);
2501
2502     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2503     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2504
2505     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2506     mfc_context->aux_batchbuffer_surface.bo = NULL;
2507
2508     if (mfc_context->aux_batchbuffer)
2509         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2510
2511     mfc_context->aux_batchbuffer = NULL;
2512
2513     free(mfc_context);
2514 }
2515
2516 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2517                                    VAProfile profile,
2518                                    struct encode_state *encode_state,
2519                                    struct intel_encoder_context *encoder_context)
2520 {
2521     VAStatus vaStatus;
2522
2523     switch (profile) {
2524     case VAProfileH264ConstrainedBaseline:
2525     case VAProfileH264Main:
2526     case VAProfileH264High:
2527     case VAProfileH264MultiviewHigh:
2528     case VAProfileH264StereoHigh:
2529         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2530         break;
2531
2532         /* FIXME: add for other profile */
2533     case VAProfileMPEG2Simple:
2534     case VAProfileMPEG2Main:
2535         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2536         break;
2537
2538     default:
2539         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2540         break;
2541     }
2542
2543     return vaStatus;
2544 }
2545
2546 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2547 {
2548     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2549
2550     if (!mfc_context)
2551         return False;
2552
2553     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2554
2555     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2556     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2557
2558     mfc_context->gpe_context.curbe.length = 32 * 4;
2559
2560     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2561     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2562     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2563     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2564     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2565
2566     i965_gpe_load_kernels(ctx,
2567                           &mfc_context->gpe_context,
2568                           gen75_mfc_kernels,
2569                           1);
2570
2571     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2572     mfc_context->set_surface_state = gen75_mfc_surface_state;
2573     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2574     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2575     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2576     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2577     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2578     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2579
2580     encoder_context->mfc_context = mfc_context;
2581     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2582     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2583     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2584
2585     return True;
2586 }