OSDN Git Service

BDW encoding reuses aux_batchbuffer instead of allocating another new buffer
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
47 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
48 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
49
50 #define MFC_SOFTWARE_HASWELL    1
51
52 #define B0_STEP_REV             2
53 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
54
55 static const uint32_t gen8_mfc_batchbuffer_avc_intra[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
57 };
58
59 static const uint32_t gen8_mfc_batchbuffer_avc_inter[][4] = {
60 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
61 };
62
63 static struct i965_kernel gen8_mfc_kernels[] = {
64     {
65         "MFC AVC INTRA BATCHBUFFER ",
66         MFC_BATCHBUFFER_AVC_INTRA,
67         gen8_mfc_batchbuffer_avc_intra,
68         sizeof(gen8_mfc_batchbuffer_avc_intra),
69         NULL
70     },
71
72     {
73         "MFC AVC INTER BATCHBUFFER ",
74         MFC_BATCHBUFFER_AVC_INTER,
75         gen8_mfc_batchbuffer_avc_inter,
76         sizeof(gen8_mfc_batchbuffer_avc_inter),
77         NULL
78     },
79 };
80
81 #define         INTER_MODE_MASK         0x03
82 #define         INTER_8X8               0x03
83 #define         INTER_16X8              0x01
84 #define         INTER_8X16              0x02
85 #define         SUBMB_SHAPE_MASK        0x00FF00
86
87 #define         INTER_MV8               (4 << 20)
88 #define         INTER_MV32              (6 << 20)
89
90
91 static void
92 gen8_mfc_pipe_mode_select(VADriverContextP ctx,
93                           int standard_select,
94                           struct intel_encoder_context *encoder_context)
95 {
96     struct intel_batchbuffer *batch = encoder_context->base.batch;
97     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
98
99     assert(standard_select == MFX_FORMAT_MPEG2 ||
100            standard_select == MFX_FORMAT_AVC);
101
102     BEGIN_BCS_BATCH(batch, 5);
103
104     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
105     OUT_BCS_BATCH(batch,
106                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
107                   (MFD_MODE_VLD << 15) | /* VLD mode */
108                   (0 << 10) | /* Stream-Out Enable */
109                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
110                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
111                   (0 << 5)  | /* not in stitch mode */
112                   (1 << 4)  | /* encoding mode */
113                   (standard_select << 0));  /* standard select: avc or mpeg2 */
114     OUT_BCS_BATCH(batch,
115                   (0 << 7)  | /* expand NOA bus flag */
116                   (0 << 6)  | /* disable slice-level clock gating */
117                   (0 << 5)  | /* disable clock gating for NOA */
118                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
119                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
120                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
121                   (0 << 1)  |
122                   (0 << 0));
123     OUT_BCS_BATCH(batch, 0);
124     OUT_BCS_BATCH(batch, 0);
125
126     ADVANCE_BCS_BATCH(batch);
127 }
128
129 static void
130 gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
131 {
132     struct intel_batchbuffer *batch = encoder_context->base.batch;
133     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
134
135     BEGIN_BCS_BATCH(batch, 6);
136
137     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
138     OUT_BCS_BATCH(batch, 0);
139     OUT_BCS_BATCH(batch,
140                   ((mfc_context->surface_state.height - 1) << 18) |
141                   ((mfc_context->surface_state.width - 1) << 4));
142     OUT_BCS_BATCH(batch,
143                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
144                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
145                   (0 << 22) | /* surface object control state, FIXME??? */
146                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
147                   (0 << 2)  | /* must be 0 for interleave U/V */
148                   (1 << 1)  | /* must be tiled */
149                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
150     OUT_BCS_BATCH(batch,
151                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
152                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
153     OUT_BCS_BATCH(batch, 0);
154
155     ADVANCE_BCS_BATCH(batch);
156 }
157
158 static void
159 gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
160                                  struct intel_encoder_context *encoder_context)
161 {
162     struct intel_batchbuffer *batch = encoder_context->base.batch;
163     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
164     struct gen6_vme_context *vme_context = encoder_context->vme_context;
165
166     BEGIN_BCS_BATCH(batch, 26);
167
168     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
169     /* the DW1-3 is for the MFX indirect bistream offset */
170     OUT_BCS_BATCH(batch, 0);
171     OUT_BCS_BATCH(batch, 0);
172     OUT_BCS_BATCH(batch, 0);
173     /* the DW4-5 is the MFX upper bound */
174     OUT_BCS_BATCH(batch, 0);
175     OUT_BCS_BATCH(batch, 0);
176
177     /* the DW6-10 is for MFX Indirect MV Object Base Address */
178     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
179     OUT_BCS_BATCH(batch, 0);
180     OUT_BCS_BATCH(batch, 0);
181     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
182     OUT_BCS_BATCH(batch, 0);
183
184     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
185     OUT_BCS_BATCH(batch, 0);
186     OUT_BCS_BATCH(batch, 0);
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190
191     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
192     OUT_BCS_BATCH(batch, 0);
193     OUT_BCS_BATCH(batch, 0);
194     OUT_BCS_BATCH(batch, 0);
195     OUT_BCS_BATCH(batch, 0);
196     OUT_BCS_BATCH(batch, 0);
197
198     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
199     OUT_BCS_RELOC(batch,
200                   mfc_context->mfc_indirect_pak_bse_object.bo,
201                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202                   0);
203     OUT_BCS_BATCH(batch, 0);
204     OUT_BCS_BATCH(batch, 0);
205         
206     OUT_BCS_RELOC(batch,
207                   mfc_context->mfc_indirect_pak_bse_object.bo,
208                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
209                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
210     OUT_BCS_BATCH(batch, 0);
211
212     ADVANCE_BCS_BATCH(batch);
213 }
214
215 static void
216 gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
217                        struct intel_encoder_context *encoder_context)
218 {
219     struct intel_batchbuffer *batch = encoder_context->base.batch;
220     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
221     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
222
223     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
224     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
225
226     BEGIN_BCS_BATCH(batch, 16);
227
228     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
229     /*DW1. MB setting of frame */
230     OUT_BCS_BATCH(batch,
231                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
232     OUT_BCS_BATCH(batch, 
233                   ((height_in_mbs - 1) << 16) | 
234                   ((width_in_mbs - 1) << 0));
235     /* DW3 QP setting */
236     OUT_BCS_BATCH(batch, 
237                   (0 << 24) |   /* Second Chroma QP Offset */
238                   (0 << 16) |   /* Chroma QP Offset */
239                   (0 << 14) |   /* Max-bit conformance Intra flag */
240                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
241                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
242                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
243                   (0 << 8)  |   /* FIXME: Image Structure */
244                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
245     OUT_BCS_BATCH(batch,
246                   (0 << 16) |   /* Mininum Frame size */
247                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
248                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
249                   (0 << 13) |   /* CABAC 0 word insertion test enable */
250                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
251                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
252                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
253                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
254                   (0 << 6)  |   /* Only valid for VLD decoding mode */
255                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
256                   (0 << 4)  |   /* Direct 8x8 inference flag */
257                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
258                   (1 << 2)  |   /* Frame MB only flag */
259                   (0 << 1)  |   /* MBAFF mode is in active */
260                   (0 << 0));    /* Field picture flag */
261     /* DW5 Trellis quantization */
262     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
263     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
264                   (0xBB8 << 16) |       /* InterMbMaxSz */
265                   (0xEE8) );            /* IntraMbMaxSz */
266     OUT_BCS_BATCH(batch, 0);            /* Reserved */
267     /* DW8. QP delta */
268     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
269     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
270     /* DW10. Bit setting for MB */
271     OUT_BCS_BATCH(batch, 0x8C000000);
272     OUT_BCS_BATCH(batch, 0x00010000);
273     /* DW12. */
274     OUT_BCS_BATCH(batch, 0);
275     OUT_BCS_BATCH(batch, 0x02010100);
276     /* DW14. For short format */
277     OUT_BCS_BATCH(batch, 0);
278     OUT_BCS_BATCH(batch, 0);
279
280     ADVANCE_BCS_BATCH(batch);
281 }
282
283 static void
284 gen8_mfc_qm_state(VADriverContextP ctx,
285                   int qm_type,
286                   unsigned int *qm,
287                   int qm_length,
288                   struct intel_encoder_context *encoder_context)
289 {
290     struct intel_batchbuffer *batch = encoder_context->base.batch;
291     unsigned int qm_buffer[16];
292
293     assert(qm_length <= 16);
294     assert(sizeof(*qm) == 4);
295     memcpy(qm_buffer, qm, qm_length * 4);
296
297     BEGIN_BCS_BATCH(batch, 18);
298     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
299     OUT_BCS_BATCH(batch, qm_type << 0);
300     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
301     ADVANCE_BCS_BATCH(batch);
302 }
303
304 static void
305 gen8_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
306 {
307     unsigned int qm[16] = {
308         0x10101010, 0x10101010, 0x10101010, 0x10101010,
309         0x10101010, 0x10101010, 0x10101010, 0x10101010,
310         0x10101010, 0x10101010, 0x10101010, 0x10101010,
311         0x10101010, 0x10101010, 0x10101010, 0x10101010
312     };
313
314     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
315     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
316     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
317     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
318 }
319
320 static void
321 gen8_mfc_fqm_state(VADriverContextP ctx,
322                    int fqm_type,
323                    unsigned int *fqm,
324                    int fqm_length,
325                    struct intel_encoder_context *encoder_context)
326 {
327     struct intel_batchbuffer *batch = encoder_context->base.batch;
328     unsigned int fqm_buffer[32];
329
330     assert(fqm_length <= 32);
331     assert(sizeof(*fqm) == 4);
332     memcpy(fqm_buffer, fqm, fqm_length * 4);
333
334     BEGIN_BCS_BATCH(batch, 34);
335     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
336     OUT_BCS_BATCH(batch, fqm_type << 0);
337     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
338     ADVANCE_BCS_BATCH(batch);
339 }
340
341 static void
342 gen8_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
343 {
344     unsigned int qm[32] = {
345         0x10001000, 0x10001000, 0x10001000, 0x10001000,
346         0x10001000, 0x10001000, 0x10001000, 0x10001000,
347         0x10001000, 0x10001000, 0x10001000, 0x10001000,
348         0x10001000, 0x10001000, 0x10001000, 0x10001000,
349         0x10001000, 0x10001000, 0x10001000, 0x10001000,
350         0x10001000, 0x10001000, 0x10001000, 0x10001000,
351         0x10001000, 0x10001000, 0x10001000, 0x10001000,
352         0x10001000, 0x10001000, 0x10001000, 0x10001000
353     };
354
355     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
356     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
357     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
358     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
359 }
360
361 static void
362 gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
363                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
364                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
365                            struct intel_batchbuffer *batch)
366 {
367     if (batch == NULL)
368         batch = encoder_context->base.batch;
369
370     if (data_bits_in_last_dw == 0)
371         data_bits_in_last_dw = 32;
372
373     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
374
375     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
376     OUT_BCS_BATCH(batch,
377                   (0 << 16) |   /* always start at offset 0 */
378                   (data_bits_in_last_dw << 8) |
379                   (skip_emul_byte_count << 4) |
380                   (!!emulation_flag << 3) |
381                   ((!!is_last_header) << 2) |
382                   ((!!is_end_of_slice) << 1) |
383                   (0 << 0));    /* FIXME: ??? */
384     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
385
386     ADVANCE_BCS_BATCH(batch);
387 }
388
389
390 static void gen8_mfc_init(VADriverContextP ctx,
391                           struct encode_state *encode_state,
392                           struct intel_encoder_context *encoder_context)
393 {
394     struct i965_driver_data *i965 = i965_driver_data(ctx);
395     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
396     dri_bo *bo;
397     int i;
398     int width_in_mbs = 0;
399     int height_in_mbs = 0;
400     int slice_batchbuffer_size;
401
402     if (encoder_context->codec == CODEC_H264) {
403         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
404         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
405         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
406     } else {
407         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
408
409         assert(encoder_context->codec == CODEC_MPEG2);
410
411         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
412         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
413     }
414
415     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
416                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
417
418     /*Encode common setup for MFC*/
419     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
420     mfc_context->post_deblocking_output.bo = NULL;
421
422     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
423     mfc_context->pre_deblocking_output.bo = NULL;
424
425     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
426     mfc_context->uncompressed_picture_source.bo = NULL;
427
428     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
429     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
430
431     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
432         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
433         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
434         mfc_context->direct_mv_buffers[i].bo = NULL;
435     }
436
437     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
438         if (mfc_context->reference_surfaces[i].bo != NULL)
439             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
440         mfc_context->reference_surfaces[i].bo = NULL;  
441     }
442
443     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
444     bo = dri_bo_alloc(i965->intel.bufmgr,
445                       "Buffer",
446                       width_in_mbs * 64,
447                       64);
448     assert(bo);
449     mfc_context->intra_row_store_scratch_buffer.bo = bo;
450
451     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
452     bo = dri_bo_alloc(i965->intel.bufmgr,
453                       "Buffer",
454                       width_in_mbs * height_in_mbs * 16,
455                       64);
456     assert(bo);
457     mfc_context->macroblock_status_buffer.bo = bo;
458
459     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
460     bo = dri_bo_alloc(i965->intel.bufmgr,
461                       "Buffer",
462                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
463                       64);
464     assert(bo);
465     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
466
467     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
468     bo = dri_bo_alloc(i965->intel.bufmgr,
469                       "Buffer",
470                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
471                       0x1000);
472     assert(bo);
473     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
474
475     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
476     mfc_context->mfc_batchbuffer_surface.bo = NULL;
477
478     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
479     mfc_context->aux_batchbuffer_surface.bo = NULL;
480
481     if (mfc_context->aux_batchbuffer)
482         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
483
484     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
485     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
486     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
487     mfc_context->aux_batchbuffer_surface.pitch = 16;
488     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
489     mfc_context->aux_batchbuffer_surface.size_block = 16;
490
491     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
492 }
493
494 static void
495 gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx,
496                              struct intel_encoder_context *encoder_context)
497 {
498     struct intel_batchbuffer *batch = encoder_context->base.batch;
499     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
500     int i;
501
502     BEGIN_BCS_BATCH(batch, 61);
503
504     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
505
506     /* the DW1-3 is for pre_deblocking */
507     if (mfc_context->pre_deblocking_output.bo)
508         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
509                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
510                       0);
511     else
512         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
513
514     OUT_BCS_BATCH(batch, 0);
515     OUT_BCS_BATCH(batch, 0);
516     /* the DW4-6 is for the post_deblocking */
517
518     if (mfc_context->post_deblocking_output.bo)
519         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
520                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
521                       0);                                                                                       /* post output addr  */ 
522     else
523         OUT_BCS_BATCH(batch, 0);
524     
525     OUT_BCS_BATCH(batch, 0);
526     OUT_BCS_BATCH(batch, 0);
527
528     /* the DW7-9 is for the uncompressed_picture */
529     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
530                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
531                   0); /* uncompressed data */
532
533     OUT_BCS_BATCH(batch, 0);
534     OUT_BCS_BATCH(batch, 0);
535
536     /* the DW10-12 is for the mb status */
537     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
538                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
539                   0); /* StreamOut data*/
540     
541     OUT_BCS_BATCH(batch, 0);
542     OUT_BCS_BATCH(batch, 0);
543
544     /* the DW13-15 is for the intra_row_store_scratch */
545     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
546                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
547                   0);   
548
549     OUT_BCS_BATCH(batch, 0);
550     OUT_BCS_BATCH(batch, 0);
551
552     /* the DW16-18 is for the deblocking filter */
553     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
554                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
555                   0);
556
557     OUT_BCS_BATCH(batch, 0);
558     OUT_BCS_BATCH(batch, 0);
559
560     /* the DW 19-50 is for Reference pictures*/
561     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
562         if ( mfc_context->reference_surfaces[i].bo != NULL) {
563             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
564                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
565                           0);                   
566         } else {
567             OUT_BCS_BATCH(batch, 0);
568         }
569
570         OUT_BCS_BATCH(batch, 0);
571     }
572
573     OUT_BCS_BATCH(batch, 0);
574
575     /* The DW 52-54 is for the MB status buffer */
576     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
577                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
578                   0);                                                                                   /* Macroblock status buffer*/
579         
580     OUT_BCS_BATCH(batch, 0);
581     OUT_BCS_BATCH(batch, 0);
582
583     /* the DW 55-57 is the ILDB buffer */
584     OUT_BCS_BATCH(batch, 0);
585     OUT_BCS_BATCH(batch, 0);
586     OUT_BCS_BATCH(batch, 0);
587
588     /* the DW 58-60 is the second ILDB buffer */
589     OUT_BCS_BATCH(batch, 0);
590     OUT_BCS_BATCH(batch, 0);
591     OUT_BCS_BATCH(batch, 0);
592
593     ADVANCE_BCS_BATCH(batch);
594 }
595
596 static void
597 gen8_mfc_avc_directmode_state(VADriverContextP ctx,
598                               struct intel_encoder_context *encoder_context)
599 {
600     struct intel_batchbuffer *batch = encoder_context->base.batch;
601     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
602
603     int i;
604
605     BEGIN_BCS_BATCH(batch, 71);
606
607     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
608
609     /* Reference frames and Current frames */
610     /* the DW1-32 is for the direct MV for reference */
611     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
612         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
613             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
614                           I915_GEM_DOMAIN_INSTRUCTION, 0,
615                           0);
616             OUT_BCS_BATCH(batch, 0);
617         } else {
618             OUT_BCS_BATCH(batch, 0);
619             OUT_BCS_BATCH(batch, 0);
620         }
621     }
622     
623     OUT_BCS_BATCH(batch, 0);
624
625     /* the DW34-36 is the MV for the current reference */
626     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
627                   I915_GEM_DOMAIN_INSTRUCTION, 0,
628                   0);
629
630     OUT_BCS_BATCH(batch, 0);
631     OUT_BCS_BATCH(batch, 0);
632
633     /* POL list */
634     for(i = 0; i < 32; i++) {
635         OUT_BCS_BATCH(batch, i/2);
636     }
637     OUT_BCS_BATCH(batch, 0);
638     OUT_BCS_BATCH(batch, 0);
639
640     ADVANCE_BCS_BATCH(batch);
641 }
642
643
644 static void
645 gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
646                                  struct intel_encoder_context *encoder_context)
647 {
648     struct intel_batchbuffer *batch = encoder_context->base.batch;
649     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
650
651     BEGIN_BCS_BATCH(batch, 10);
652
653     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
654     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
655                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
656                   0);
657     OUT_BCS_BATCH(batch, 0);
658     OUT_BCS_BATCH(batch, 0);
659         
660     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
661     OUT_BCS_BATCH(batch, 0);
662     OUT_BCS_BATCH(batch, 0);
663     OUT_BCS_BATCH(batch, 0);
664
665     /* the DW7-9 is for Bitplane Read Buffer Base Address */
666     OUT_BCS_BATCH(batch, 0);
667     OUT_BCS_BATCH(batch, 0);
668     OUT_BCS_BATCH(batch, 0);
669
670     ADVANCE_BCS_BATCH(batch);
671 }
672
673
674 static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
675                                                       struct encode_state *encode_state,
676                                                       struct intel_encoder_context *encoder_context)
677 {
678     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
679
680     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
681     mfc_context->set_surface_state(ctx, encoder_context);
682     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
683     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
684     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
685     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
686     mfc_context->avc_qm_state(ctx, encoder_context);
687     mfc_context->avc_fqm_state(ctx, encoder_context);
688     gen8_mfc_avc_directmode_state(ctx, encoder_context); 
689     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
690 }
691
692
693 static VAStatus gen8_mfc_run(VADriverContextP ctx, 
694                              struct encode_state *encode_state,
695                              struct intel_encoder_context *encoder_context)
696 {
697     struct intel_batchbuffer *batch = encoder_context->base.batch;
698
699     intel_batchbuffer_flush(batch);             //run the pipeline
700
701     return VA_STATUS_SUCCESS;
702 }
703
704
705 static VAStatus
706 gen8_mfc_stop(VADriverContextP ctx, 
707               struct encode_state *encode_state,
708               struct intel_encoder_context *encoder_context,
709               int *encoded_bits_size)
710 {
711     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
712     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
713     VACodedBufferSegment *coded_buffer_segment;
714     
715     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
716     assert(vaStatus == VA_STATUS_SUCCESS);
717     *encoded_bits_size = coded_buffer_segment->size * 8;
718     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
719
720     return VA_STATUS_SUCCESS;
721 }
722
723
724 static void
725 gen8_mfc_avc_slice_state(VADriverContextP ctx,
726                          VAEncPictureParameterBufferH264 *pic_param,
727                          VAEncSliceParameterBufferH264 *slice_param,
728                          struct encode_state *encode_state,
729                          struct intel_encoder_context *encoder_context,
730                          int rate_control_enable,
731                          int qp,
732                          struct intel_batchbuffer *batch)
733 {
734     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
735     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
736     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
737     int beginmb = slice_param->macroblock_address;
738     int endmb = beginmb + slice_param->num_macroblocks;
739     int beginx = beginmb % width_in_mbs;
740     int beginy = beginmb / width_in_mbs;
741     int nextx =  endmb % width_in_mbs;
742     int nexty = endmb / width_in_mbs;
743     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
744     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
745     int maxQpN, maxQpP;
746     unsigned char correct[6], grow, shrink;
747     int i;
748     int weighted_pred_idc = 0;
749     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
750     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
751     int num_ref_l0 = 0, num_ref_l1 = 0;
752
753     if (batch == NULL)
754         batch = encoder_context->base.batch;
755
756     if (slice_type == SLICE_TYPE_I) {
757         luma_log2_weight_denom = 0;
758         chroma_log2_weight_denom = 0;
759     } else if (slice_type == SLICE_TYPE_P) {
760         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
761         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
762
763         if (slice_param->num_ref_idx_active_override_flag)
764             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
765     } else if (slice_type == SLICE_TYPE_B) {
766         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
767         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
768         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
769
770         if (slice_param->num_ref_idx_active_override_flag) {
771             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
772             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
773         }
774
775         if (weighted_pred_idc == 2) {
776             /* 8.4.3 - Derivation process for prediction weights (8-279) */
777             luma_log2_weight_denom = 5;
778             chroma_log2_weight_denom = 5;
779         }
780     }
781
782     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
783     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
784
785     for (i = 0; i < 6; i++)
786         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
787
788     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
789         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
790     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
791         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
792
793     BEGIN_BCS_BATCH(batch, 11);;
794
795     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
796     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
797
798     OUT_BCS_BATCH(batch,
799                   (num_ref_l0 << 16) |
800                   (num_ref_l1 << 24) |
801                   (chroma_log2_weight_denom << 8) |
802                   (luma_log2_weight_denom << 0));
803
804     OUT_BCS_BATCH(batch, 
805                   (weighted_pred_idc << 30) |
806                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
807                   (slice_param->disable_deblocking_filter_idc << 27) |
808                   (slice_param->cabac_init_idc << 24) |
809                   (qp<<16) |                    /*Slice Quantization Parameter*/
810                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
811                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
812     OUT_BCS_BATCH(batch,
813                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
814                   (beginx << 16) |
815                   slice_param->macroblock_address );
816     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
817     OUT_BCS_BATCH(batch, 
818                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
819                   (1 << 30) |           /*ResetRateControlCounter*/
820                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
821                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
822                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
823                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
824                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
825                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
826                   (last_slice << 19) |     /*IsLastSlice*/
827                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
828                   (1 << 17) |       /*HeaderPresentFlag*/       
829                   (1 << 16) |       /*SliceData PresentFlag*/
830                   (1 << 15) |       /*TailPresentFlag*/
831                   (1 << 13) |       /*RBSP NAL TYPE*/   
832                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
833     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
834     OUT_BCS_BATCH(batch,
835                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
836                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
837                   (shrink << 8)  |
838                   (grow << 0));   
839     OUT_BCS_BATCH(batch,
840                   (correct[5] << 20) |
841                   (correct[4] << 16) |
842                   (correct[3] << 12) |
843                   (correct[2] << 8) |
844                   (correct[1] << 4) |
845                   (correct[0] << 0));
846     OUT_BCS_BATCH(batch, 0);
847
848     ADVANCE_BCS_BATCH(batch);
849 }
850
851
852 #ifdef MFC_SOFTWARE_HASWELL
853
854 static int
855 gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
856                               int qp,unsigned int *msg,
857                               struct intel_encoder_context *encoder_context,
858                               unsigned char target_mb_size, unsigned char max_mb_size,
859                               struct intel_batchbuffer *batch)
860 {
861     int len_in_dwords = 12;
862     unsigned int intra_msg;
863 #define         INTRA_MSG_FLAG          (1 << 13)
864 #define         INTRA_MBTYPE_MASK       (0x1F0000)
865     if (batch == NULL)
866         batch = encoder_context->base.batch;
867
868     BEGIN_BCS_BATCH(batch, len_in_dwords);
869
870     intra_msg = msg[0] & 0xC0FF;
871     intra_msg |= INTRA_MSG_FLAG;
872     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
873     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
874     OUT_BCS_BATCH(batch, 0);
875     OUT_BCS_BATCH(batch, 0);
876     OUT_BCS_BATCH(batch, 
877                   (0 << 24) |           /* PackedMvNum, Debug*/
878                   (0 << 20) |           /* No motion vector */
879                   (1 << 19) |           /* CbpDcY */
880                   (1 << 18) |           /* CbpDcU */
881                   (1 << 17) |           /* CbpDcV */
882                   intra_msg);
883
884     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
885     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
886     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
887
888     /*Stuff for Intra MB*/
889     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
890     OUT_BCS_BATCH(batch, msg[2]);       
891     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
892     
893     /*MaxSizeInWord and TargetSzieInWord*/
894     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
895                   (target_mb_size << 16) );
896
897     OUT_BCS_BATCH(batch, 0);
898
899     ADVANCE_BCS_BATCH(batch);
900
901     return len_in_dwords;
902 }
903
904 static int
905 gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
906                               unsigned int *msg, unsigned int offset,
907                               struct intel_encoder_context *encoder_context,
908                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
909                               struct intel_batchbuffer *batch)
910 {
911     struct gen6_vme_context *vme_context = encoder_context->vme_context;
912     int len_in_dwords = 12;
913     unsigned int inter_msg = 0;
914     if (batch == NULL)
915         batch = encoder_context->base.batch;
916     {
917 #define MSG_MV_OFFSET   4
918         unsigned int *mv_ptr;
919         mv_ptr = msg + MSG_MV_OFFSET;
920         /* MV of VME output is based on 16 sub-blocks. So it is necessary
921          * to convert them to be compatible with the format of AVC_PAK
922          * command.
923          */
924         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
925             /* MV[0] and MV[2] are replicated */
926             mv_ptr[4] = mv_ptr[0];
927             mv_ptr[5] = mv_ptr[1];
928             mv_ptr[2] = mv_ptr[8];
929             mv_ptr[3] = mv_ptr[9];
930             mv_ptr[6] = mv_ptr[8];
931             mv_ptr[7] = mv_ptr[9];
932         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
933             /* MV[0] and MV[1] are replicated */
934             mv_ptr[2] = mv_ptr[0];
935             mv_ptr[3] = mv_ptr[1];
936             mv_ptr[4] = mv_ptr[16];
937             mv_ptr[5] = mv_ptr[17];
938             mv_ptr[6] = mv_ptr[24];
939             mv_ptr[7] = mv_ptr[25];
940         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
941                    !(msg[1] & SUBMB_SHAPE_MASK)) {
942             /* Don't touch MV[0] or MV[1] */
943             mv_ptr[2] = mv_ptr[8];
944             mv_ptr[3] = mv_ptr[9];
945             mv_ptr[4] = mv_ptr[16];
946             mv_ptr[5] = mv_ptr[17];
947             mv_ptr[6] = mv_ptr[24];
948             mv_ptr[7] = mv_ptr[25];
949         }
950     }
951
952     BEGIN_BCS_BATCH(batch, len_in_dwords);
953
954     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
955
956     inter_msg = 32;
957     /* MV quantity */
958     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
959         if (msg[1] & SUBMB_SHAPE_MASK)
960             inter_msg = 128;
961     }
962     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
963     OUT_BCS_BATCH(batch, offset);
964     inter_msg = msg[0] & (0x1F00FFFF);
965     inter_msg |= INTER_MV8;
966     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
967     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
968         (msg[1] & SUBMB_SHAPE_MASK)) {
969         inter_msg |= INTER_MV32;
970     }
971
972     OUT_BCS_BATCH(batch, inter_msg);
973
974     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
975     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
976 #if 0 
977     if ( slice_type == SLICE_TYPE_B) {
978         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
979     } else {
980         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
981     }
982 #else
983     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
984 #endif
985
986     inter_msg = msg[1] >> 8;
987     /*Stuff for Inter MB*/
988     OUT_BCS_BATCH(batch, inter_msg);        
989     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
990     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
991
992     /*MaxSizeInWord and TargetSzieInWord*/
993     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
994                   (target_mb_size << 16) );
995
996     OUT_BCS_BATCH(batch, 0x0);    
997
998     ADVANCE_BCS_BATCH(batch);
999
1000     return len_in_dwords;
1001 }
1002
1003 #define         AVC_INTRA_RDO_OFFSET    4
1004 #define         AVC_INTER_RDO_OFFSET    10
1005 #define         AVC_INTER_MSG_OFFSET    8       
1006 #define         AVC_INTER_MV_OFFSET             48
1007 #define         AVC_RDO_MASK            0xFFFF
1008
1009 static void 
1010 gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1011                                        struct encode_state *encode_state,
1012                                        struct intel_encoder_context *encoder_context,
1013                                        int slice_index,
1014                                        struct intel_batchbuffer *slice_batch)
1015 {
1016     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1017     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1018     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1019     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1020     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1021     unsigned int *msg = NULL, offset = 0;
1022     unsigned char *msg_ptr = NULL;
1023     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1024     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1025     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1026     int i,x,y;
1027     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1028     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1029     unsigned char *slice_header = NULL;
1030     int slice_header_length_in_bits = 0;
1031     unsigned int tail_data[] = { 0x0, 0x0 };
1032     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1033     int is_intra = slice_type == SLICE_TYPE_I;
1034
1035
1036     if (rate_control_mode == VA_RC_CBR) {
1037         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1038         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1039     }
1040
1041     /* only support for 8-bit pixel bit-depth */
1042     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1043     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1044     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1045     assert(qp >= 0 && qp < 52);
1046
1047     gen8_mfc_avc_slice_state(ctx, 
1048                              pPicParameter,
1049                              pSliceParameter,
1050                              encode_state, encoder_context,
1051                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1052
1053     if ( slice_index == 0) 
1054         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1055
1056     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1057
1058     // slice hander
1059     mfc_context->insert_object(ctx, encoder_context,
1060                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1061                                5,  /* first 5 bytes are start code + nal unit type */
1062                                1, 0, 1, slice_batch);
1063
1064     dri_bo_map(vme_context->vme_output.bo , 1);
1065     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1066
1067     if (is_intra) {
1068         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1069     } else {
1070         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1071     }
1072    
1073     for (i = pSliceParameter->macroblock_address; 
1074          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1075         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1076         x = i % width_in_mbs;
1077         y = i / width_in_mbs;
1078         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1079
1080         if (is_intra) {
1081             assert(msg);
1082             gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1083         } else {
1084             int inter_rdo, intra_rdo;
1085             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1086             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1087             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1088             if (intra_rdo < inter_rdo) { 
1089                 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1090             } else {
1091                 msg += AVC_INTER_MSG_OFFSET;
1092                 gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1093             }
1094         }
1095     }
1096    
1097     dri_bo_unmap(vme_context->vme_output.bo);
1098
1099     if ( last_slice ) {    
1100         mfc_context->insert_object(ctx, encoder_context,
1101                                    tail_data, 2, 8,
1102                                    2, 1, 1, 0, slice_batch);
1103     } else {
1104         mfc_context->insert_object(ctx, encoder_context,
1105                                    tail_data, 1, 8,
1106                                    1, 1, 1, 0, slice_batch);
1107     }
1108
1109     free(slice_header);
1110
1111 }
1112
1113 static dri_bo *
1114 gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1115                                   struct encode_state *encode_state,
1116                                   struct intel_encoder_context *encoder_context)
1117 {
1118     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1119     struct i965_driver_data *i965 = i965_driver_data(ctx);
1120     struct intel_batchbuffer *batch;
1121     dri_bo *batch_bo;
1122     int i;
1123
1124     batch = mfc_context->aux_batchbuffer;
1125     batch_bo = batch->buffer;
1126     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1127         gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1128     }
1129
1130     intel_batchbuffer_align(batch, 8);
1131     
1132     BEGIN_BCS_BATCH(batch, 2);
1133     OUT_BCS_BATCH(batch, 0);
1134     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1135     ADVANCE_BCS_BATCH(batch);
1136
1137     dri_bo_reference(batch_bo);
1138     intel_batchbuffer_free(batch);
1139     mfc_context->aux_batchbuffer = NULL;
1140
1141     return batch_bo;
1142 }
1143
1144 #else
1145
1146 static void
1147 gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1148                                     struct encode_state *encode_state,
1149                                     struct intel_encoder_context *encoder_context)
1150
1151 {
1152     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1153     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1154
1155     assert(vme_context->vme_output.bo);
1156     mfc_context->buffer_suface_setup(ctx,
1157                                      &mfc_context->gpe_context,
1158                                      &vme_context->vme_output,
1159                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1160                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1161     assert(mfc_context->aux_batchbuffer_surface.bo);
1162     mfc_context->buffer_suface_setup(ctx,
1163                                      &mfc_context->gpe_context,
1164                                      &mfc_context->aux_batchbuffer_surface,
1165                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1166                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1167 }
1168
1169 static void
1170 gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1171                                      struct encode_state *encode_state,
1172                                      struct intel_encoder_context *encoder_context)
1173
1174 {
1175     struct i965_driver_data *i965 = i965_driver_data(ctx);
1176     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1177     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1178     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1179     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1180     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1181     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1182     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1183     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1184                                                            "MFC batchbuffer",
1185                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1186                                                            0x1000);
1187     mfc_context->buffer_suface_setup(ctx,
1188                                      &mfc_context->gpe_context,
1189                                      &mfc_context->mfc_batchbuffer_surface,
1190                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1191                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1192 }
1193
1194 static void
1195 gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1196                                     struct encode_state *encode_state,
1197                                     struct intel_encoder_context *encoder_context)
1198 {
1199     gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1200     gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1201 }
1202
1203 static void
1204 gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1205                                 struct encode_state *encode_state,
1206                                 struct intel_encoder_context *encoder_context)
1207 {
1208     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1209     struct gen6_interface_descriptor_data *desc;   
1210     int i;
1211     dri_bo *bo;
1212
1213     bo = mfc_context->gpe_context.idrt.bo;
1214     dri_bo_map(bo, 1);
1215     assert(bo->virtual);
1216     desc = bo->virtual;
1217
1218     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1219         struct i965_kernel *kernel;
1220
1221         kernel = &mfc_context->gpe_context.kernels[i];
1222         assert(sizeof(*desc) == 32);
1223
1224         /*Setup the descritor table*/
1225         memset(desc, 0, sizeof(*desc));
1226         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1227         desc->desc2.sampler_count = 0;
1228         desc->desc2.sampler_state_pointer = 0;
1229         desc->desc3.binding_table_entry_count = 2;
1230         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1231         desc->desc4.constant_urb_entry_read_offset = 0;
1232         desc->desc4.constant_urb_entry_read_length = 4;
1233                 
1234         /*kernel start*/
1235         dri_bo_emit_reloc(bo,   
1236                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1237                           0,
1238                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1239                           kernel->bo);
1240         desc++;
1241     }
1242
1243     dri_bo_unmap(bo);
1244 }
1245
1246 static void
1247 gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1248                                     struct encode_state *encode_state,
1249                                     struct intel_encoder_context *encoder_context)
1250 {
1251     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1252     
1253     (void)mfc_context;
1254 }
1255
1256 static void
1257 gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1258                                          int index,
1259                                          int head_offset,
1260                                          int batchbuffer_offset,
1261                                          int head_size,
1262                                          int tail_size,
1263                                          int number_mb_cmds,
1264                                          int first_object,
1265                                          int last_object,
1266                                          int last_slice,
1267                                          int mb_x,
1268                                          int mb_y,
1269                                          int width_in_mbs,
1270                                          int qp)
1271 {
1272     BEGIN_BATCH(batch, 12);
1273     
1274     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1275     OUT_BATCH(batch, index);
1276     OUT_BATCH(batch, 0);
1277     OUT_BATCH(batch, 0);
1278     OUT_BATCH(batch, 0);
1279     OUT_BATCH(batch, 0);
1280    
1281     /*inline data */
1282     OUT_BATCH(batch, head_offset);
1283     OUT_BATCH(batch, batchbuffer_offset);
1284     OUT_BATCH(batch, 
1285               head_size << 16 |
1286               tail_size);
1287     OUT_BATCH(batch,
1288               number_mb_cmds << 16 |
1289               first_object << 2 |
1290               last_object << 1 |
1291               last_slice);
1292     OUT_BATCH(batch,
1293               mb_y << 8 |
1294               mb_x);
1295     OUT_BATCH(batch,
1296               qp << 16 |
1297               width_in_mbs);
1298
1299     ADVANCE_BATCH(batch);
1300 }
1301
1302 static void
1303 gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1304                                        struct intel_encoder_context *encoder_context,
1305                                        VAEncSliceParameterBufferH264 *slice_param,
1306                                        int head_offset,
1307                                        unsigned short head_size,
1308                                        unsigned short tail_size,
1309                                        int batchbuffer_offset,
1310                                        int qp,
1311                                        int last_slice)
1312 {
1313     struct intel_batchbuffer *batch = encoder_context->base.batch;
1314     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1315     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1316     int total_mbs = slice_param->num_macroblocks;
1317     int number_mb_cmds = 128;
1318     int starting_mb = 0;
1319     int last_object = 0;
1320     int first_object = 1;
1321     int i;
1322     int mb_x, mb_y;
1323     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1324
1325     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1326         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1327         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1328         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1329         assert(mb_x <= 255 && mb_y <= 255);
1330
1331         starting_mb += number_mb_cmds;
1332
1333         gen8_mfc_batchbuffer_emit_object_command(batch,
1334                                                  index,
1335                                                  head_offset,
1336                                                  batchbuffer_offset,
1337                                                  head_size,
1338                                                  tail_size,
1339                                                  number_mb_cmds,
1340                                                  first_object,
1341                                                  last_object,
1342                                                  last_slice,
1343                                                  mb_x,
1344                                                  mb_y,
1345                                                  width_in_mbs,
1346                                                  qp);
1347
1348         if (first_object) {
1349             head_offset += head_size;
1350             batchbuffer_offset += head_size;
1351         }
1352
1353         if (last_object) {
1354             head_offset += tail_size;
1355             batchbuffer_offset += tail_size;
1356         }
1357
1358         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1359
1360         first_object = 0;
1361     }
1362
1363     if (!last_object) {
1364         last_object = 1;
1365         number_mb_cmds = total_mbs % number_mb_cmds;
1366         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1367         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1368         assert(mb_x <= 255 && mb_y <= 255);
1369         starting_mb += number_mb_cmds;
1370
1371         gen8_mfc_batchbuffer_emit_object_command(batch,
1372                                                  index,
1373                                                  head_offset,
1374                                                  batchbuffer_offset,
1375                                                  head_size,
1376                                                  tail_size,
1377                                                  number_mb_cmds,
1378                                                  first_object,
1379                                                  last_object,
1380                                                  last_slice,
1381                                                  mb_x,
1382                                                  mb_y,
1383                                                  width_in_mbs,
1384                                                  qp);
1385     }
1386 }
1387                           
1388 /*
1389  * return size in Owords (16bytes)
1390  */         
1391 static int
1392 gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1393                                struct encode_state *encode_state,
1394                                struct intel_encoder_context *encoder_context,
1395                                int slice_index,
1396                                int batchbuffer_offset)
1397 {
1398     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1399     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1400     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1401     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1402     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1403     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1404     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1405     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1406     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1407     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1408     unsigned char *slice_header = NULL;
1409     int slice_header_length_in_bits = 0;
1410     unsigned int tail_data[] = { 0x0, 0x0 };
1411     long head_offset;
1412     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1413     unsigned short head_size, tail_size;
1414     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1415
1416     if (rate_control_mode == VA_RC_CBR) {
1417         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1418         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1419     }
1420
1421     /* only support for 8-bit pixel bit-depth */
1422     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1423     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1424     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1425     assert(qp >= 0 && qp < 52);
1426
1427     head_offset = old_used / 16;
1428     gen8_mfc_avc_slice_state(ctx,
1429                              pPicParameter,
1430                              pSliceParameter,
1431                              encode_state,
1432                              encoder_context,
1433                              (rate_control_mode == VA_RC_CBR),
1434                              qp,
1435                              slice_batch);
1436
1437     if (slice_index == 0)
1438         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1439
1440     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1441
1442     // slice hander
1443     mfc_context->insert_object(ctx,
1444                                encoder_context,
1445                                (unsigned int *)slice_header,
1446                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1447                                slice_header_length_in_bits & 0x1f,
1448                                5,  /* first 5 bytes are start code + nal unit type */
1449                                1,
1450                                0,
1451                                1,
1452                                slice_batch);
1453     free(slice_header);
1454
1455     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1456     used = intel_batchbuffer_used_size(slice_batch);
1457     head_size = (used - old_used) / 16;
1458     old_used = used;
1459
1460     /* tail */
1461     if (last_slice) {    
1462         mfc_context->insert_object(ctx,
1463                                    encoder_context,
1464                                    tail_data,
1465                                    2,
1466                                    8,
1467                                    2,
1468                                    1,
1469                                    1,
1470                                    0,
1471                                    slice_batch);
1472     } else {
1473         mfc_context->insert_object(ctx,
1474                                    encoder_context,
1475                                    tail_data,
1476                                    1,
1477                                    8,
1478                                    1,
1479                                    1,
1480                                    1,
1481                                    0,
1482                                    slice_batch);
1483     }
1484
1485     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1486     used = intel_batchbuffer_used_size(slice_batch);
1487     tail_size = (used - old_used) / 16;
1488
1489    
1490     gen8_mfc_avc_batchbuffer_slice_command(ctx,
1491                                            encoder_context,
1492                                            pSliceParameter,
1493                                            head_offset,
1494                                            head_size,
1495                                            tail_size,
1496                                            batchbuffer_offset,
1497                                            qp,
1498                                            last_slice);
1499
1500     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1501 }
1502
1503 static void
1504 gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1505                                   struct encode_state *encode_state,
1506                                   struct intel_encoder_context *encoder_context)
1507 {
1508     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1509     struct intel_batchbuffer *batch = encoder_context->base.batch;
1510     int i, size, offset = 0;
1511     intel_batchbuffer_start_atomic(batch, 0x4000); 
1512     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1513
1514     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1515         size = gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1516         offset += size;
1517     }
1518
1519     intel_batchbuffer_end_atomic(batch);
1520     intel_batchbuffer_flush(batch);
1521 }
1522
1523 static void
1524 gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1525                                struct encode_state *encode_state,
1526                                struct intel_encoder_context *encoder_context)
1527 {
1528     gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1529     gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1530     gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1531     gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1532 }
1533
1534 static dri_bo *
1535 gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1536                                   struct encode_state *encode_state,
1537                                   struct intel_encoder_context *encoder_context)
1538 {
1539     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1540
1541     gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1542     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1543
1544     return mfc_context->mfc_batchbuffer_surface.bo;
1545 }
1546
1547 #endif
1548
1549 static void
1550 gen8_mfc_avc_pipeline_programing(VADriverContextP ctx,
1551                                  struct encode_state *encode_state,
1552                                  struct intel_encoder_context *encoder_context)
1553 {
1554     struct intel_batchbuffer *batch = encoder_context->base.batch;
1555     dri_bo *slice_batch_bo;
1556
1557     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1558         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1559         assert(0);
1560         return; 
1561     }
1562
1563 #ifdef MFC_SOFTWARE_HASWELL
1564     slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1565 #else
1566     slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1567 #endif
1568
1569     // begin programing
1570     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1571     intel_batchbuffer_emit_mi_flush(batch);
1572     
1573     // picture level programing
1574     gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1575
1576     BEGIN_BCS_BATCH(batch, 3);
1577     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1578     OUT_BCS_RELOC(batch,
1579                   slice_batch_bo,
1580                   I915_GEM_DOMAIN_COMMAND, 0, 
1581                   0);
1582     OUT_BCS_BATCH(batch, 0);
1583     ADVANCE_BCS_BATCH(batch);
1584
1585     // end programing
1586     intel_batchbuffer_end_atomic(batch);
1587
1588     dri_bo_unreference(slice_batch_bo);
1589 }
1590
1591
1592 static VAStatus
1593 gen8_mfc_avc_encode_picture(VADriverContextP ctx, 
1594                             struct encode_state *encode_state,
1595                             struct intel_encoder_context *encoder_context)
1596 {
1597     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1598     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1599     int current_frame_bits_size;
1600     int sts;
1601  
1602     for (;;) {
1603         gen8_mfc_init(ctx, encode_state, encoder_context);
1604         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1605         /*Programing bcs pipeline*/
1606         gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
1607         gen8_mfc_run(ctx, encode_state, encoder_context);
1608         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1609             gen8_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1610             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1611             if (sts == BRC_NO_HRD_VIOLATION) {
1612                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1613                 break;
1614             }
1615             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1616                 if (!mfc_context->hrd.violation_noted) {
1617                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1618                     mfc_context->hrd.violation_noted = 1;
1619                 }
1620                 return VA_STATUS_SUCCESS;
1621             }
1622         } else {
1623             break;
1624         }
1625     }
1626
1627     return VA_STATUS_SUCCESS;
1628 }
1629
1630 /*
1631  * MPEG-2
1632  */
1633
1634 static const int
1635 va_to_gen8_mpeg2_picture_type[3] = {
1636     1,  /* I */
1637     2,  /* P */
1638     3   /* B */
1639 };
1640
1641 static void
1642 gen8_mfc_mpeg2_pic_state(VADriverContextP ctx,
1643                          struct intel_encoder_context *encoder_context,
1644                          struct encode_state *encode_state)
1645 {
1646     struct intel_batchbuffer *batch = encoder_context->base.batch;
1647     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1648     VAEncPictureParameterBufferMPEG2 *pic_param;
1649     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1650     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1651     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1652
1653     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1654     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1655     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1656
1657     BEGIN_BCS_BATCH(batch, 13);
1658     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1659     OUT_BCS_BATCH(batch,
1660                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1661                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1662                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1663                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1664                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1665                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1666                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1667                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1668                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1669                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1670                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1671                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1672     OUT_BCS_BATCH(batch,
1673                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1674                   va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 |
1675                   0);
1676     OUT_BCS_BATCH(batch,
1677                   1 << 31 |     /* slice concealment */
1678                   (height_in_mbs - 1) << 16 |
1679                   (width_in_mbs - 1));
1680
1681     if (slice_param && slice_param->quantiser_scale_code >= 14)
1682         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1683     else
1684         OUT_BCS_BATCH(batch, 0);
1685
1686     OUT_BCS_BATCH(batch, 0);
1687     OUT_BCS_BATCH(batch,
1688                   0xFFF << 16 | /* InterMBMaxSize */
1689                   0xFFF << 0 |  /* IntraMBMaxSize */
1690                   0);
1691     OUT_BCS_BATCH(batch, 0);
1692     OUT_BCS_BATCH(batch, 0);
1693     OUT_BCS_BATCH(batch, 0);
1694     OUT_BCS_BATCH(batch, 0);
1695     OUT_BCS_BATCH(batch, 0);
1696     OUT_BCS_BATCH(batch, 0);
1697     ADVANCE_BCS_BATCH(batch);
1698 }
1699
1700 static void
1701 gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1702 {
1703     unsigned char intra_qm[64] = {
1704         8, 16, 19, 22, 26, 27, 29, 34,
1705         16, 16, 22, 24, 27, 29, 34, 37,
1706         19, 22, 26, 27, 29, 34, 34, 38,
1707         22, 22, 26, 27, 29, 34, 37, 40,
1708         22, 26, 27, 29, 32, 35, 40, 48,
1709         26, 27, 29, 32, 35, 40, 48, 58,
1710         26, 27, 29, 34, 38, 46, 56, 69,
1711         27, 29, 35, 38, 46, 56, 69, 83
1712     };
1713
1714     unsigned char non_intra_qm[64] = {
1715         16, 16, 16, 16, 16, 16, 16, 16,
1716         16, 16, 16, 16, 16, 16, 16, 16,
1717         16, 16, 16, 16, 16, 16, 16, 16,
1718         16, 16, 16, 16, 16, 16, 16, 16,
1719         16, 16, 16, 16, 16, 16, 16, 16,
1720         16, 16, 16, 16, 16, 16, 16, 16,
1721         16, 16, 16, 16, 16, 16, 16, 16,
1722         16, 16, 16, 16, 16, 16, 16, 16
1723     };
1724
1725     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1726     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1727 }
1728
1729 static void
1730 gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1731 {
1732     unsigned short intra_fqm[64] = {
1733         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1734         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1735         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1736         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1737         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1738         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1739         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1740         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1741     };
1742
1743     unsigned short non_intra_fqm[64] = {
1744         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1745         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1746         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1747         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1748         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1749         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1750         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1751         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1752     };
1753
1754     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1755     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1756 }
1757
1758 static void
1759 gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1760                                 struct intel_encoder_context *encoder_context,
1761                                 int x, int y,
1762                                 int next_x, int next_y,
1763                                 int is_fisrt_slice_group,
1764                                 int is_last_slice_group,
1765                                 int intra_slice,
1766                                 int qp,
1767                                 struct intel_batchbuffer *batch)
1768 {
1769     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1770
1771     if (batch == NULL)
1772         batch = encoder_context->base.batch;
1773
1774     BEGIN_BCS_BATCH(batch, 8);
1775
1776     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1777     OUT_BCS_BATCH(batch,
1778                   0 << 31 |                             /* MbRateCtrlFlag */
1779                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1780                   1 << 17 |                             /* Insert Header before the first slice group data */
1781                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1782                   1 << 15 |                             /* TailPresentFlag: always 1 */
1783                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1784                   !!intra_slice << 13 |                 /* IntraSlice */
1785                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1786                   0);
1787     OUT_BCS_BATCH(batch,
1788                   next_y << 24 |
1789                   next_x << 16 |
1790                   y << 8 |
1791                   x << 0 |
1792                   0);
1793     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1794     /* bitstream pointer is only loaded once for the first slice of a frame when 
1795      * LoadSlicePointerFlag is 0
1796      */
1797     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1798     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1799     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1800     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1801
1802     ADVANCE_BCS_BATCH(batch);
1803 }
1804
1805 static int
1806 gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1807                                 struct intel_encoder_context *encoder_context,
1808                                 int x, int y,
1809                                 int first_mb_in_slice,
1810                                 int last_mb_in_slice,
1811                                 int first_mb_in_slice_group,
1812                                 int last_mb_in_slice_group,
1813                                 int mb_type,
1814                                 int qp_scale_code,
1815                                 int coded_block_pattern,
1816                                 unsigned char target_size_in_word,
1817                                 unsigned char max_size_in_word,
1818                                 struct intel_batchbuffer *batch)
1819 {
1820     int len_in_dwords = 9;
1821
1822     if (batch == NULL)
1823         batch = encoder_context->base.batch;
1824
1825     BEGIN_BCS_BATCH(batch, len_in_dwords);
1826
1827     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1828     OUT_BCS_BATCH(batch,
1829                   0 << 24 |     /* PackedMvNum */
1830                   0 << 20 |     /* MvFormat */
1831                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1832                   0 << 15 |     /* TransformFlag: frame DCT */
1833                   0 << 14 |     /* FieldMbFlag */
1834                   1 << 13 |     /* IntraMbFlag */
1835                   mb_type << 8 |   /* MbType: Intra */
1836                   0 << 2 |      /* SkipMbFlag */
1837                   0 << 0 |      /* InterMbMode */
1838                   0);
1839     OUT_BCS_BATCH(batch, y << 16 | x);
1840     OUT_BCS_BATCH(batch,
1841                   max_size_in_word << 24 |
1842                   target_size_in_word << 16 |
1843                   coded_block_pattern << 6 |      /* CBP */
1844                   0);
1845     OUT_BCS_BATCH(batch,
1846                   last_mb_in_slice << 31 |
1847                   first_mb_in_slice << 30 |
1848                   0 << 27 |     /* EnableCoeffClamp */
1849                   last_mb_in_slice_group << 26 |
1850                   0 << 25 |     /* MbSkipConvDisable */
1851                   first_mb_in_slice_group << 24 |
1852                   0 << 16 |     /* MvFieldSelect */
1853                   qp_scale_code << 0 |
1854                   0);
1855     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1856     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1857     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1858     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1859
1860     ADVANCE_BCS_BATCH(batch);
1861
1862     return len_in_dwords;
1863 }
1864
1865 /* Byte offset */
1866 #define MPEG2_INTER_MV_OFFSET   48 
1867
1868 static struct _mv_ranges
1869 {
1870     int low;    /* in the unit of 1/2 pixel */
1871     int high;   /* in the unit of 1/2 pixel */
1872 } mv_ranges[] = {
1873     {0, 0},
1874     {-16, 15},
1875     {-32, 31},
1876     {-64, 63},
1877     {-128, 127},
1878     {-256, 255},
1879     {-512, 511},
1880     {-1024, 1023},
1881     {-2048, 2047},
1882     {-4096, 4095}
1883 };
1884
1885 static int
1886 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1887 {
1888     if (mv + pos * 16 * 2 < 0 ||
1889         mv + (pos + 1) * 16 * 2 > display_max * 2)
1890         mv = 0;
1891
1892     if (f_code > 0 && f_code < 10) {
1893         if (mv < mv_ranges[f_code].low)
1894             mv = mv_ranges[f_code].low;
1895
1896         if (mv > mv_ranges[f_code].high)
1897             mv = mv_ranges[f_code].high;
1898     }
1899
1900     return mv;
1901 }
1902
1903 static int
1904 gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
1905                                 struct encode_state *encode_state,
1906                                 struct intel_encoder_context *encoder_context,
1907                                 unsigned int *msg,
1908                                 int width_in_mbs, int height_in_mbs,
1909                                 int x, int y,
1910                                 int first_mb_in_slice,
1911                                 int last_mb_in_slice,
1912                                 int first_mb_in_slice_group,
1913                                 int last_mb_in_slice_group,
1914                                 int qp_scale_code,
1915                                 unsigned char target_size_in_word,
1916                                 unsigned char max_size_in_word,
1917                                 struct intel_batchbuffer *batch)
1918 {
1919     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1920     int len_in_dwords = 9;
1921     short *mvptr, mvx0, mvy0, mvx1, mvy1;
1922     
1923     if (batch == NULL)
1924         batch = encoder_context->base.batch;
1925
1926     mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
1927     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
1928     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
1929     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
1930     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
1931
1932     BEGIN_BCS_BATCH(batch, len_in_dwords);
1933
1934     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1935     OUT_BCS_BATCH(batch,
1936                   2 << 24 |     /* PackedMvNum */
1937                   7 << 20 |     /* MvFormat */
1938                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1939                   0 << 15 |     /* TransformFlag: frame DCT */
1940                   0 << 14 |     /* FieldMbFlag */
1941                   0 << 13 |     /* IntraMbFlag */
1942                   1 << 8 |      /* MbType: Frame-based */
1943                   0 << 2 |      /* SkipMbFlag */
1944                   0 << 0 |      /* InterMbMode */
1945                   0);
1946     OUT_BCS_BATCH(batch, y << 16 | x);
1947     OUT_BCS_BATCH(batch,
1948                   max_size_in_word << 24 |
1949                   target_size_in_word << 16 |
1950                   0x3f << 6 |   /* CBP */
1951                   0);
1952     OUT_BCS_BATCH(batch,
1953                   last_mb_in_slice << 31 |
1954                   first_mb_in_slice << 30 |
1955                   0 << 27 |     /* EnableCoeffClamp */
1956                   last_mb_in_slice_group << 26 |
1957                   0 << 25 |     /* MbSkipConvDisable */
1958                   first_mb_in_slice_group << 24 |
1959                   0 << 16 |     /* MvFieldSelect */
1960                   qp_scale_code << 0 |
1961                   0);
1962
1963     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
1964     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
1965     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1966     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1967
1968     ADVANCE_BCS_BATCH(batch);
1969
1970     return len_in_dwords;
1971 }
1972
1973 static void
1974 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
1975                                            struct encode_state *encode_state,
1976                                            struct intel_encoder_context *encoder_context,
1977                                            struct intel_batchbuffer *slice_batch)
1978 {
1979     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1980     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
1981
1982     if (encode_state->packed_header_data[idx]) {
1983         VAEncPackedHeaderParameterBuffer *param = NULL;
1984         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1985         unsigned int length_in_bits;
1986
1987         assert(encode_state->packed_header_param[idx]);
1988         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1989         length_in_bits = param->bit_length;
1990
1991         mfc_context->insert_object(ctx,
1992                                    encoder_context,
1993                                    header_data,
1994                                    ALIGN(length_in_bits, 32) >> 5,
1995                                    length_in_bits & 0x1f,
1996                                    5,   /* FIXME: check it */
1997                                    0,
1998                                    0,
1999                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2000                                    slice_batch);
2001     }
2002
2003     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2004
2005     if (encode_state->packed_header_data[idx]) {
2006         VAEncPackedHeaderParameterBuffer *param = NULL;
2007         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2008         unsigned int length_in_bits;
2009
2010         assert(encode_state->packed_header_param[idx]);
2011         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2012         length_in_bits = param->bit_length;
2013
2014         mfc_context->insert_object(ctx,
2015                                    encoder_context,
2016                                    header_data,
2017                                    ALIGN(length_in_bits, 32) >> 5,
2018                                    length_in_bits & 0x1f,
2019                                    5,   /* FIXME: check it */
2020                                    0,
2021                                    0,
2022                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2023                                    slice_batch);
2024     }
2025 }
2026
2027 static void 
2028 gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2029                                     struct encode_state *encode_state,
2030                                     struct intel_encoder_context *encoder_context,
2031                                     int slice_index,
2032                                     VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2033                                     struct intel_batchbuffer *slice_batch)
2034 {
2035     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2036     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2037     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2038     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2039     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2040     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2041     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2042     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2043     int i, j;
2044     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2045     unsigned int *msg = NULL;
2046     unsigned char *msg_ptr = NULL;
2047
2048     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2049     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2050     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2051     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2052
2053     dri_bo_map(vme_context->vme_output.bo , 0);
2054     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2055
2056     if (next_slice_group_param) {
2057         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2058         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2059     } else {
2060         h_next_start_pos = 0;
2061         v_next_start_pos = height_in_mbs;
2062     }
2063
2064     gen8_mfc_mpeg2_slicegroup_state(ctx,
2065                                     encoder_context,
2066                                     h_start_pos,
2067                                     v_start_pos,
2068                                     h_next_start_pos,
2069                                     v_next_start_pos,
2070                                     slice_index == 0,
2071                                     next_slice_group_param == NULL,
2072                                     slice_param->is_intra_slice,
2073                                     slice_param->quantiser_scale_code,
2074                                     slice_batch);
2075
2076     if (slice_index == 0) 
2077         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2078
2079     /* Insert '00' to make sure the header is valid */
2080     mfc_context->insert_object(ctx,
2081                                encoder_context,
2082                                (unsigned int*)section_delimiter,
2083                                1,
2084                                8,   /* 8bits in the last DWORD */
2085                                1,   /* 1 byte */
2086                                1,
2087                                0,
2088                                0,
2089                                slice_batch);
2090
2091     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2092         /* PAK for each macroblocks */
2093         for (j = 0; j < slice_param->num_macroblocks; j++) {
2094             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2095             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2096             int first_mb_in_slice = (j == 0);
2097             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2098             int first_mb_in_slice_group = (i == 0 && j == 0);
2099             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2100                                           j == slice_param->num_macroblocks - 1);
2101
2102             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2103
2104             if (slice_param->is_intra_slice) {
2105                 gen8_mfc_mpeg2_pak_object_intra(ctx,
2106                                                 encoder_context,
2107                                                 h_pos, v_pos,
2108                                                 first_mb_in_slice,
2109                                                 last_mb_in_slice,
2110                                                 first_mb_in_slice_group,
2111                                                 last_mb_in_slice_group,
2112                                                 0x1a,
2113                                                 slice_param->quantiser_scale_code,
2114                                                 0x3f,
2115                                                 0,
2116                                                 0xff,
2117                                                 slice_batch);
2118             } else {
2119                 int inter_rdo, intra_rdo;
2120                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2121                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2122
2123                 if (intra_rdo < inter_rdo) 
2124                     gen8_mfc_mpeg2_pak_object_intra(ctx,
2125                                                      encoder_context,
2126                                                      h_pos, v_pos,
2127                                                      first_mb_in_slice,
2128                                                      last_mb_in_slice,
2129                                                      first_mb_in_slice_group,
2130                                                      last_mb_in_slice_group,
2131                                                      0x1a,
2132                                                      slice_param->quantiser_scale_code,
2133                                                      0x3f,
2134                                                      0,
2135                                                      0xff,
2136                                                      slice_batch);
2137                 else
2138                     gen8_mfc_mpeg2_pak_object_inter(ctx,
2139                                                 encode_state,
2140                                                 encoder_context,
2141                                                 msg,
2142                                                 width_in_mbs, height_in_mbs,
2143                                                 h_pos, v_pos,
2144                                                 first_mb_in_slice,
2145                                                 last_mb_in_slice,
2146                                                 first_mb_in_slice_group,
2147                                                 last_mb_in_slice_group,
2148                                                 slice_param->quantiser_scale_code,
2149                                                 0,
2150                                                 0xff,
2151                                                 slice_batch);
2152             }
2153         }
2154
2155         slice_param++;
2156     }
2157
2158     dri_bo_unmap(vme_context->vme_output.bo);
2159
2160     /* tail data */
2161     if (next_slice_group_param == NULL) { /* end of a picture */
2162         mfc_context->insert_object(ctx,
2163                                    encoder_context,
2164                                    (unsigned int *)tail_delimiter,
2165                                    2,
2166                                    8,   /* 8bits in the last DWORD */
2167                                    5,   /* 5 bytes */
2168                                    1,
2169                                    1,
2170                                    0,
2171                                    slice_batch);
2172     } else {        /* end of a lsice group */
2173         mfc_context->insert_object(ctx,
2174                                    encoder_context,
2175                                    (unsigned int *)section_delimiter,
2176                                    1,
2177                                    8,   /* 8bits in the last DWORD */
2178                                    1,   /* 1 byte */
2179                                    1,
2180                                    1,
2181                                    0,
2182                                    slice_batch);
2183     }
2184 }
2185
2186 /* 
2187  * A batch buffer for all slices, including slice state, 
2188  * slice insert object and slice pak object commands
2189  *
2190  */
2191 static dri_bo *
2192 gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2193                                           struct encode_state *encode_state,
2194                                           struct intel_encoder_context *encoder_context)
2195 {
2196     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2197     struct i965_driver_data *i965 = i965_driver_data(ctx);
2198     struct intel_batchbuffer *batch;
2199     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2200     dri_bo *batch_bo;
2201     int i;
2202
2203     batch = mfc_context->aux_batchbuffer;
2204     batch_bo = batch->buffer;
2205
2206     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2207         if (i == encode_state->num_slice_params_ext - 1)
2208             next_slice_group_param = NULL;
2209         else
2210             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2211
2212         gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2213     }
2214
2215     intel_batchbuffer_align(batch, 8);
2216     
2217     BEGIN_BCS_BATCH(batch, 2);
2218     OUT_BCS_BATCH(batch, 0);
2219     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2220     ADVANCE_BCS_BATCH(batch);
2221
2222     dri_bo_reference(batch_bo);
2223     intel_batchbuffer_free(batch);
2224     mfc_context->aux_batchbuffer = NULL;
2225
2226     return batch_bo;
2227 }
2228
2229 static void
2230 gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2231                                            struct encode_state *encode_state,
2232                                            struct intel_encoder_context *encoder_context)
2233 {
2234     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2235
2236     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2237     mfc_context->set_surface_state(ctx, encoder_context);
2238     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2239     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
2240     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2241     gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2242     gen8_mfc_mpeg2_qm_state(ctx, encoder_context);
2243     gen8_mfc_mpeg2_fqm_state(ctx, encoder_context);
2244 }
2245
2246 static void
2247 gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2248                                    struct encode_state *encode_state,
2249                                    struct intel_encoder_context *encoder_context)
2250 {
2251     struct intel_batchbuffer *batch = encoder_context->base.batch;
2252     dri_bo *slice_batch_bo;
2253
2254     slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2255
2256     // begin programing
2257     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2258     intel_batchbuffer_emit_mi_flush(batch);
2259     
2260     // picture level programing
2261     gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2262
2263     BEGIN_BCS_BATCH(batch, 4);
2264     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2265     OUT_BCS_RELOC(batch,
2266                   slice_batch_bo,
2267                   I915_GEM_DOMAIN_COMMAND, 0, 
2268                   0);
2269     OUT_BCS_BATCH(batch, 0);
2270     OUT_BCS_BATCH(batch, 0);
2271     ADVANCE_BCS_BATCH(batch);
2272
2273     // end programing
2274     intel_batchbuffer_end_atomic(batch);
2275
2276     dri_bo_unreference(slice_batch_bo);
2277 }
2278
2279 static VAStatus
2280 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2281                         struct encode_state *encode_state,
2282                         struct intel_encoder_context *encoder_context)
2283 {
2284     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2285     struct object_surface *obj_surface; 
2286     struct object_buffer *obj_buffer;
2287     struct i965_coded_buffer_segment *coded_buffer_segment;
2288     VAStatus vaStatus = VA_STATUS_SUCCESS;
2289     dri_bo *bo;
2290     int i;
2291
2292     /* reconstructed surface */
2293     obj_surface = encode_state->reconstructed_object;
2294     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2295     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2296     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2297     mfc_context->surface_state.width = obj_surface->orig_width;
2298     mfc_context->surface_state.height = obj_surface->orig_height;
2299     mfc_context->surface_state.w_pitch = obj_surface->width;
2300     mfc_context->surface_state.h_pitch = obj_surface->height;
2301
2302     /* forward reference */
2303     obj_surface = encode_state->reference_objects[0];
2304
2305     if (obj_surface && obj_surface->bo) {
2306         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2307         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2308     } else
2309         mfc_context->reference_surfaces[0].bo = NULL;
2310
2311     /* backward reference */
2312     obj_surface = encode_state->reference_objects[1];
2313
2314     if (obj_surface && obj_surface->bo) {
2315         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2316         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2317     } else {
2318         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2319
2320         if (mfc_context->reference_surfaces[1].bo)
2321             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2322     }
2323
2324     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2325         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2326
2327         if (mfc_context->reference_surfaces[i].bo)
2328             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2329     }
2330     
2331     /* input YUV surface */
2332     obj_surface = encode_state->input_yuv_object;
2333     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2334     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2335
2336     /* coded buffer */
2337     obj_buffer = encode_state->coded_buf_object;
2338     bo = obj_buffer->buffer_store->bo;
2339     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2340     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2341     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2342     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2343
2344     /* set the internal flag to 0 to indicate the coded size is unknown */
2345     dri_bo_map(bo, 1);
2346     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2347     coded_buffer_segment->mapped = 0;
2348     coded_buffer_segment->codec = encoder_context->codec;
2349     dri_bo_unmap(bo);
2350
2351     return vaStatus;
2352 }
2353
2354 static VAStatus
2355 gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2356                               struct encode_state *encode_state,
2357                               struct intel_encoder_context *encoder_context)
2358 {
2359     gen8_mfc_init(ctx, encode_state, encoder_context);
2360     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2361     /*Programing bcs pipeline*/
2362     gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2363     gen8_mfc_run(ctx, encode_state, encoder_context);
2364
2365     return VA_STATUS_SUCCESS;
2366 }
2367
2368 static void
2369 gen8_mfc_context_destroy(void *context)
2370 {
2371     struct gen6_mfc_context *mfc_context = context;
2372     int i;
2373
2374     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2375     mfc_context->post_deblocking_output.bo = NULL;
2376
2377     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2378     mfc_context->pre_deblocking_output.bo = NULL;
2379
2380     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2381     mfc_context->uncompressed_picture_source.bo = NULL;
2382
2383     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2384     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2385
2386     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2387         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2388         mfc_context->direct_mv_buffers[i].bo = NULL;
2389     }
2390
2391     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2392     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2393
2394     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2395     mfc_context->macroblock_status_buffer.bo = NULL;
2396
2397     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2398     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2399
2400     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2401     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2402
2403
2404     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2405         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2406         mfc_context->reference_surfaces[i].bo = NULL;  
2407     }
2408
2409     i965_gpe_context_destroy(&mfc_context->gpe_context);
2410
2411     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2412     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2413
2414     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2415     mfc_context->aux_batchbuffer_surface.bo = NULL;
2416
2417     if (mfc_context->aux_batchbuffer)
2418         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2419
2420     mfc_context->aux_batchbuffer = NULL;
2421
2422     free(mfc_context);
2423 }
2424
2425 static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
2426                                   VAProfile profile,
2427                                   struct encode_state *encode_state,
2428                                   struct intel_encoder_context *encoder_context)
2429 {
2430     VAStatus vaStatus;
2431
2432     switch (profile) {
2433     case VAProfileH264ConstrainedBaseline:
2434     case VAProfileH264Main:
2435     case VAProfileH264High:
2436         vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2437         break;
2438
2439         /* FIXME: add for other profile */
2440     case VAProfileMPEG2Simple:
2441     case VAProfileMPEG2Main:
2442         vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2443         break;
2444
2445     default:
2446         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2447         break;
2448     }
2449
2450     return vaStatus;
2451 }
2452
2453 Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2454 {
2455     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2456
2457     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2458
2459     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2460     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2461
2462     mfc_context->gpe_context.curbe.length = 32 * 4;
2463
2464     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2465     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2466     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2467     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2468     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2469
2470     i965_gpe_load_kernels(ctx,
2471                           &mfc_context->gpe_context,
2472                           gen8_mfc_kernels,
2473                           NUM_MFC_KERNEL);
2474
2475     mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select;
2476     mfc_context->set_surface_state = gen8_mfc_surface_state;
2477     mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state;
2478     mfc_context->avc_img_state = gen8_mfc_avc_img_state;
2479     mfc_context->avc_qm_state = gen8_mfc_avc_qm_state;
2480     mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state;
2481     mfc_context->insert_object = gen8_mfc_avc_insert_object;
2482     mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2483
2484     encoder_context->mfc_context = mfc_context;
2485     encoder_context->mfc_context_destroy = gen8_mfc_context_destroy;
2486     encoder_context->mfc_pipeline = gen8_mfc_pipeline;
2487     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2488
2489     return True;
2490 }