OSDN Git Service

VP8 HWEnc: vp8 encode quality optimization
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_mfc.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45 #include "vp8_probs.h"
46
47 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
48 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
49 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
50
51 #define MFC_SOFTWARE_HASWELL    1
52
53 #define B0_STEP_REV             2
54 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
55
56 static const uint32_t gen9_mfc_batchbuffer_avc_intra[][4] = {
57 #include "shaders/utils/mfc_batchbuffer_avc_intra.g9b"
58 };
59
60 static const uint32_t gen9_mfc_batchbuffer_avc_inter[][4] = {
61 #include "shaders/utils/mfc_batchbuffer_avc_inter.g9b"
62 };
63
64 static struct i965_kernel gen9_mfc_kernels[] = {
65     {
66         "MFC AVC INTRA BATCHBUFFER ",
67         MFC_BATCHBUFFER_AVC_INTRA,
68         gen9_mfc_batchbuffer_avc_intra,
69         sizeof(gen9_mfc_batchbuffer_avc_intra),
70         NULL
71     },
72
73     {
74         "MFC AVC INTER BATCHBUFFER ",
75         MFC_BATCHBUFFER_AVC_INTER,
76         gen9_mfc_batchbuffer_avc_inter,
77         sizeof(gen9_mfc_batchbuffer_avc_inter),
78         NULL
79     },
80 };
81
82 #define         INTER_MODE_MASK         0x03
83 #define         INTER_8X8               0x03
84 #define         INTER_16X8              0x01
85 #define         INTER_8X16              0x02
86 #define         SUBMB_SHAPE_MASK        0x00FF00
87 #define         INTER_16X16             0x00
88
89 #define         INTER_MV8               (4 << 20)
90 #define         INTER_MV32              (6 << 20)
91
92 static void
93 gen9_mfc_pipe_mode_select(VADriverContextP ctx,
94                           int standard_select,
95                           struct intel_encoder_context *encoder_context)
96 {
97     struct intel_batchbuffer *batch = encoder_context->base.batch;
98     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
99
100     assert(standard_select == MFX_FORMAT_MPEG2 ||
101            standard_select == MFX_FORMAT_AVC  ||
102            standard_select == MFX_FORMAT_VP8);
103
104     BEGIN_BCS_BATCH(batch, 5);
105
106     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
107     OUT_BCS_BATCH(batch,
108                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
109                   (MFD_MODE_VLD << 15) | /* VLD mode */
110                   (0 << 10) | /* Stream-Out Enable */
111                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
112                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
113                   (0 << 6)  | /* frame statistics stream-out enable*/
114                   (0 << 5)  | /* not in stitch mode */
115                   (1 << 4)  | /* encoding mode */
116                   (standard_select << 0));  /* standard select: avc or mpeg2 */
117     OUT_BCS_BATCH(batch,
118                   (0 << 7)  | /* expand NOA bus flag */
119                   (0 << 6)  | /* disable slice-level clock gating */
120                   (0 << 5)  | /* disable clock gating for NOA */
121                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
122                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
123                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
124                   (0 << 1)  |
125                   (0 << 0));
126     OUT_BCS_BATCH(batch, 0);
127     OUT_BCS_BATCH(batch, 0);
128
129     ADVANCE_BCS_BATCH(batch);
130 }
131
132 static void
133 gen9_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
134 {
135     struct intel_batchbuffer *batch = encoder_context->base.batch;
136     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
137
138     BEGIN_BCS_BATCH(batch, 6);
139
140     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
141     OUT_BCS_BATCH(batch, 0);
142     OUT_BCS_BATCH(batch,
143                   ((mfc_context->surface_state.height - 1) << 18) |
144                   ((mfc_context->surface_state.width - 1) << 4));
145     OUT_BCS_BATCH(batch,
146                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
147                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
148                   (0 << 22) | /* surface object control state, FIXME??? */
149                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
150                   (0 << 2)  | /* must be 0 for interleave U/V */
151                   (1 << 1)  | /* must be tiled */
152                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
153     OUT_BCS_BATCH(batch,
154                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
155                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
156     OUT_BCS_BATCH(batch, 0);
157
158     ADVANCE_BCS_BATCH(batch);
159 }
160
161 static void
162 gen9_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
163                                  struct intel_encoder_context *encoder_context)
164 {
165     struct intel_batchbuffer *batch = encoder_context->base.batch;
166     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
167     struct gen6_vme_context *vme_context = encoder_context->vme_context;
168     int vme_size;
169
170     BEGIN_BCS_BATCH(batch, 26);
171
172     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
173     /* the DW1-3 is for the MFX indirect bistream offset */
174     OUT_BCS_BATCH(batch, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0);
177
178     vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
179
180     /* the DW4-5 is the MFX upper bound */
181     if (encoder_context->codec == CODEC_VP8) {
182         OUT_BCS_RELOC(batch,
183                 mfc_context->mfc_indirect_pak_bse_object.bo,
184                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
185                 mfc_context->mfc_indirect_pak_bse_object.end_offset);
186         OUT_BCS_BATCH(batch, 0);
187     } else {
188         OUT_BCS_BATCH(batch, 0);
189         OUT_BCS_BATCH(batch, 0);
190     }
191
192     /* the DW6-10 is for MFX Indirect MV Object Base Address */
193     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
194     OUT_BCS_BATCH(batch, 0);
195     OUT_BCS_BATCH(batch, 0);
196     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
197     OUT_BCS_BATCH(batch, 0);
198
199     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
200     OUT_BCS_BATCH(batch, 0);
201     OUT_BCS_BATCH(batch, 0);
202     OUT_BCS_BATCH(batch, 0);
203     OUT_BCS_BATCH(batch, 0);
204     OUT_BCS_BATCH(batch, 0);
205
206     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
207     OUT_BCS_BATCH(batch, 0);
208     OUT_BCS_BATCH(batch, 0);
209     OUT_BCS_BATCH(batch, 0);
210     OUT_BCS_BATCH(batch, 0);
211     OUT_BCS_BATCH(batch, 0);
212
213     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
214     OUT_BCS_RELOC(batch,
215                   mfc_context->mfc_indirect_pak_bse_object.bo,
216                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
217                   0);
218     OUT_BCS_BATCH(batch, 0);
219     OUT_BCS_BATCH(batch, 0);
220
221     OUT_BCS_RELOC(batch,
222                   mfc_context->mfc_indirect_pak_bse_object.bo,
223                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
224                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
225     OUT_BCS_BATCH(batch, 0);
226
227     ADVANCE_BCS_BATCH(batch);
228 }
229
230 static void
231 gen9_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
232                        struct intel_encoder_context *encoder_context)
233 {
234     struct intel_batchbuffer *batch = encoder_context->base.batch;
235     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
236     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
237
238     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
239     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
240
241     BEGIN_BCS_BATCH(batch, 16);
242
243     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
244     /*DW1. MB setting of frame */
245     OUT_BCS_BATCH(batch,
246                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
247     OUT_BCS_BATCH(batch,
248                   ((height_in_mbs - 1) << 16) |
249                   ((width_in_mbs - 1) << 0));
250     /* DW3 QP setting */
251     OUT_BCS_BATCH(batch,
252                   (0 << 24) |   /* Second Chroma QP Offset */
253                   (0 << 16) |   /* Chroma QP Offset */
254                   (0 << 14) |   /* Max-bit conformance Intra flag */
255                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
256                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
257                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
258                   (0 << 8)  |   /* FIXME: Image Structure */
259                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
260     OUT_BCS_BATCH(batch,
261                   (0 << 16) |   /* Mininum Frame size */
262                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
263                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
264                   (0 << 13) |   /* CABAC 0 word insertion test enable */
265                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
266                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
267                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
268                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
269                   (0 << 6)  |   /* Only valid for VLD decoding mode */
270                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
271                   (0 << 4)  |   /* Direct 8x8 inference flag */
272                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
273                   (1 << 2)  |   /* Frame MB only flag */
274                   (0 << 1)  |   /* MBAFF mode is in active */
275                   (0 << 0));    /* Field picture flag */
276     /* DW5 Trellis quantization */
277     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
278     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
279                   (0xBB8 << 16) |       /* InterMbMaxSz */
280                   (0xEE8) );            /* IntraMbMaxSz */
281     OUT_BCS_BATCH(batch, 0);            /* Reserved */
282     /* DW8. QP delta */
283     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
284     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
285     /* DW10. Bit setting for MB */
286     OUT_BCS_BATCH(batch, 0x8C000000);
287     OUT_BCS_BATCH(batch, 0x00010000);
288     /* DW12. */
289     OUT_BCS_BATCH(batch, 0);
290     OUT_BCS_BATCH(batch, 0x02010100);
291     /* DW14. For short format */
292     OUT_BCS_BATCH(batch, 0);
293     OUT_BCS_BATCH(batch, 0);
294
295     ADVANCE_BCS_BATCH(batch);
296 }
297
298 static void
299 gen9_mfc_qm_state(VADriverContextP ctx,
300                   int qm_type,
301                   unsigned int *qm,
302                   int qm_length,
303                   struct intel_encoder_context *encoder_context)
304 {
305     struct intel_batchbuffer *batch = encoder_context->base.batch;
306     unsigned int qm_buffer[16];
307
308     assert(qm_length <= 16);
309     assert(sizeof(*qm) == 4);
310     memcpy(qm_buffer, qm, qm_length * 4);
311
312     BEGIN_BCS_BATCH(batch, 18);
313     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
314     OUT_BCS_BATCH(batch, qm_type << 0);
315     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
316     ADVANCE_BCS_BATCH(batch);
317 }
318
319 static void
320 gen9_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
321 {
322     unsigned int qm[16] = {
323         0x10101010, 0x10101010, 0x10101010, 0x10101010,
324         0x10101010, 0x10101010, 0x10101010, 0x10101010,
325         0x10101010, 0x10101010, 0x10101010, 0x10101010,
326         0x10101010, 0x10101010, 0x10101010, 0x10101010
327     };
328
329     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
330     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
331     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
332     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
333 }
334
335 static void
336 gen9_mfc_fqm_state(VADriverContextP ctx,
337                    int fqm_type,
338                    unsigned int *fqm,
339                    int fqm_length,
340                    struct intel_encoder_context *encoder_context)
341 {
342     struct intel_batchbuffer *batch = encoder_context->base.batch;
343     unsigned int fqm_buffer[32];
344
345     assert(fqm_length <= 32);
346     assert(sizeof(*fqm) == 4);
347     memcpy(fqm_buffer, fqm, fqm_length * 4);
348
349     BEGIN_BCS_BATCH(batch, 34);
350     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
351     OUT_BCS_BATCH(batch, fqm_type << 0);
352     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
353     ADVANCE_BCS_BATCH(batch);
354 }
355
356 static void
357 gen9_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
358 {
359     unsigned int qm[32] = {
360         0x10001000, 0x10001000, 0x10001000, 0x10001000,
361         0x10001000, 0x10001000, 0x10001000, 0x10001000,
362         0x10001000, 0x10001000, 0x10001000, 0x10001000,
363         0x10001000, 0x10001000, 0x10001000, 0x10001000,
364         0x10001000, 0x10001000, 0x10001000, 0x10001000,
365         0x10001000, 0x10001000, 0x10001000, 0x10001000,
366         0x10001000, 0x10001000, 0x10001000, 0x10001000,
367         0x10001000, 0x10001000, 0x10001000, 0x10001000
368     };
369
370     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
371     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
372     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
373     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
374 }
375
376 static void
377 gen9_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
378                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
379                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
380                            struct intel_batchbuffer *batch)
381 {
382     if (batch == NULL)
383         batch = encoder_context->base.batch;
384
385     if (data_bits_in_last_dw == 0)
386         data_bits_in_last_dw = 32;
387
388     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
389
390     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
391     OUT_BCS_BATCH(batch,
392                   (0 << 16) |   /* always start at offset 0 */
393                   (data_bits_in_last_dw << 8) |
394                   (skip_emul_byte_count << 4) |
395                   (!!emulation_flag << 3) |
396                   ((!!is_last_header) << 2) |
397                   ((!!is_end_of_slice) << 1) |
398                   (0 << 0));    /* FIXME: ??? */
399     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
400
401     ADVANCE_BCS_BATCH(batch);
402 }
403
404
405 static void gen9_mfc_init(VADriverContextP ctx,
406                           struct encode_state *encode_state,
407                           struct intel_encoder_context *encoder_context)
408 {
409     struct i965_driver_data *i965 = i965_driver_data(ctx);
410     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
411     dri_bo *bo;
412     int i;
413     int width_in_mbs = 0;
414     int height_in_mbs = 0;
415     int slice_batchbuffer_size;
416
417     if (encoder_context->codec == CODEC_H264 ||
418         encoder_context->codec == CODEC_H264_MVC) {
419         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
420         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
421         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
422     } else {
423         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
424
425         assert(encoder_context->codec == CODEC_MPEG2);
426
427         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
428         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
429     }
430
431     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
432                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
433
434     /*Encode common setup for MFC*/
435     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
436     mfc_context->post_deblocking_output.bo = NULL;
437
438     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
439     mfc_context->pre_deblocking_output.bo = NULL;
440
441     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
442     mfc_context->uncompressed_picture_source.bo = NULL;
443
444     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
445     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
446
447     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
448         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
449         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
450         mfc_context->direct_mv_buffers[i].bo = NULL;
451     }
452
453     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
454         if (mfc_context->reference_surfaces[i].bo != NULL)
455             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
456         mfc_context->reference_surfaces[i].bo = NULL;
457     }
458
459     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
460     bo = dri_bo_alloc(i965->intel.bufmgr,
461                       "Buffer",
462                       width_in_mbs * 64,
463                       64);
464     assert(bo);
465     mfc_context->intra_row_store_scratch_buffer.bo = bo;
466
467     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
468     bo = dri_bo_alloc(i965->intel.bufmgr,
469                       "Buffer",
470                       width_in_mbs * height_in_mbs * 16,
471                       64);
472     assert(bo);
473     mfc_context->macroblock_status_buffer.bo = bo;
474
475     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
476     bo = dri_bo_alloc(i965->intel.bufmgr,
477                       "Buffer",
478                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
479                       64);
480     assert(bo);
481     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
482
483     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
484     bo = dri_bo_alloc(i965->intel.bufmgr,
485                       "Buffer",
486                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
487                       0x1000);
488     assert(bo);
489     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
490
491     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
492     mfc_context->mfc_batchbuffer_surface.bo = NULL;
493
494     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
495     mfc_context->aux_batchbuffer_surface.bo = NULL;
496
497     if (mfc_context->aux_batchbuffer)
498         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
499
500     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
501     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
502     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
503     mfc_context->aux_batchbuffer_surface.pitch = 16;
504     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
505     mfc_context->aux_batchbuffer_surface.size_block = 16;
506
507     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
508 }
509
510 static void
511 gen9_mfc_pipe_buf_addr_state(VADriverContextP ctx,
512                              struct intel_encoder_context *encoder_context)
513 {
514     struct intel_batchbuffer *batch = encoder_context->base.batch;
515     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
516     int i;
517
518     BEGIN_BCS_BATCH(batch, 61);
519
520     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
521
522     /* the DW1-3 is for pre_deblocking */
523     if (mfc_context->pre_deblocking_output.bo)
524         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
525                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
526                       0);
527     else
528         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
529
530     OUT_BCS_BATCH(batch, 0);
531     OUT_BCS_BATCH(batch, 0);
532     /* the DW4-6 is for the post_deblocking */
533
534     /* post output addr  */
535     if (mfc_context->post_deblocking_output.bo)
536         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
537                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
538                       0);
539     else
540         OUT_BCS_BATCH(batch, 0);
541
542     OUT_BCS_BATCH(batch, 0);
543     OUT_BCS_BATCH(batch, 0);
544
545     /* the DW7-9 is for the uncompressed_picture */
546     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
547                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
548                   0); /* uncompressed data */
549
550     OUT_BCS_BATCH(batch, 0);
551     OUT_BCS_BATCH(batch, 0);
552
553     /* the DW10-12 is for the mb status */
554     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
555                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
556                   0); /* StreamOut data*/
557
558     OUT_BCS_BATCH(batch, 0);
559     OUT_BCS_BATCH(batch, 0);
560
561     /* the DW13-15 is for the intra_row_store_scratch */
562     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
563                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
564                   0);
565
566     OUT_BCS_BATCH(batch, 0);
567     OUT_BCS_BATCH(batch, 0);
568
569     /* the DW16-18 is for the deblocking filter */
570     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
571                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
572                   0);
573
574     OUT_BCS_BATCH(batch, 0);
575     OUT_BCS_BATCH(batch, 0);
576
577     /* the DW 19-50 is for Reference pictures*/
578     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
579         if ( mfc_context->reference_surfaces[i].bo != NULL) {
580             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
581                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
582                           0);
583         } else {
584             OUT_BCS_BATCH(batch, 0);
585         }
586
587         OUT_BCS_BATCH(batch, 0);
588     }
589
590     OUT_BCS_BATCH(batch, 0);
591
592     /* The DW 52-54 is for the MB status buffer */
593     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
594                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
595                   0);
596
597     OUT_BCS_BATCH(batch, 0);
598     OUT_BCS_BATCH(batch, 0);
599
600     /* the DW 55-57 is the ILDB buffer */
601     OUT_BCS_BATCH(batch, 0);
602     OUT_BCS_BATCH(batch, 0);
603     OUT_BCS_BATCH(batch, 0);
604
605     /* the DW 58-60 is the second ILDB buffer */
606     OUT_BCS_BATCH(batch, 0);
607     OUT_BCS_BATCH(batch, 0);
608     OUT_BCS_BATCH(batch, 0);
609
610     ADVANCE_BCS_BATCH(batch);
611 }
612
613 static void
614 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
615                               struct intel_encoder_context *encoder_context)
616 {
617     struct intel_batchbuffer *batch = encoder_context->base.batch;
618     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
619
620     int i;
621
622     BEGIN_BCS_BATCH(batch, 71);
623
624     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
625
626     /* Reference frames and Current frames */
627     /* the DW1-32 is for the direct MV for reference */
628     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
629         if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
630             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
631                           I915_GEM_DOMAIN_INSTRUCTION, 0,
632                           0);
633             OUT_BCS_BATCH(batch, 0);
634         } else {
635             OUT_BCS_BATCH(batch, 0);
636             OUT_BCS_BATCH(batch, 0);
637         }
638     }
639
640     OUT_BCS_BATCH(batch, 0);
641
642     /* the DW34-36 is the MV for the current reference */
643     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
644                   I915_GEM_DOMAIN_INSTRUCTION, 0,
645                   0);
646
647     OUT_BCS_BATCH(batch, 0);
648     OUT_BCS_BATCH(batch, 0);
649
650     /* POL list */
651     for(i = 0; i < 32; i++) {
652         OUT_BCS_BATCH(batch, i/2);
653     }
654     OUT_BCS_BATCH(batch, 0);
655     OUT_BCS_BATCH(batch, 0);
656
657     ADVANCE_BCS_BATCH(batch);
658 }
659
660
661 static void
662 gen9_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
663                                  struct intel_encoder_context *encoder_context)
664 {
665     struct intel_batchbuffer *batch = encoder_context->base.batch;
666     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
667
668     BEGIN_BCS_BATCH(batch, 10);
669
670     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
671     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
672                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
673                   0);
674     OUT_BCS_BATCH(batch, 0);
675     OUT_BCS_BATCH(batch, 0);
676
677     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
678     OUT_BCS_BATCH(batch, 0);
679     OUT_BCS_BATCH(batch, 0);
680     OUT_BCS_BATCH(batch, 0);
681
682     /* the DW7-9 is for Bitplane Read Buffer Base Address */
683     OUT_BCS_BATCH(batch, 0);
684     OUT_BCS_BATCH(batch, 0);
685     OUT_BCS_BATCH(batch, 0);
686
687     ADVANCE_BCS_BATCH(batch);
688 }
689
690
691 static void gen9_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
692                                                       struct encode_state *encode_state,
693                                                       struct intel_encoder_context *encoder_context)
694 {
695     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
696
697     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
698     mfc_context->set_surface_state(ctx, encoder_context);
699     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
700     gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
701     gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
702     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
703     mfc_context->avc_qm_state(ctx, encoder_context);
704     mfc_context->avc_fqm_state(ctx, encoder_context);
705     gen9_mfc_avc_directmode_state(ctx, encoder_context);
706     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
707 }
708
709
710 static VAStatus gen9_mfc_run(VADriverContextP ctx,
711                              struct encode_state *encode_state,
712                              struct intel_encoder_context *encoder_context)
713 {
714     struct intel_batchbuffer *batch = encoder_context->base.batch;
715
716     intel_batchbuffer_flush(batch);             //run the pipeline
717
718     return VA_STATUS_SUCCESS;
719 }
720
721
722 static VAStatus
723 gen9_mfc_stop(VADriverContextP ctx,
724               struct encode_state *encode_state,
725               struct intel_encoder_context *encoder_context,
726               int *encoded_bits_size)
727 {
728     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
729     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
730     VACodedBufferSegment *coded_buffer_segment;
731
732     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
733     assert(vaStatus == VA_STATUS_SUCCESS);
734     *encoded_bits_size = coded_buffer_segment->size * 8;
735     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
736
737     return VA_STATUS_SUCCESS;
738 }
739
740
741 static void
742 gen9_mfc_avc_slice_state(VADriverContextP ctx,
743                          VAEncPictureParameterBufferH264 *pic_param,
744                          VAEncSliceParameterBufferH264 *slice_param,
745                          struct encode_state *encode_state,
746                          struct intel_encoder_context *encoder_context,
747                          int rate_control_enable,
748                          int qp,
749                          struct intel_batchbuffer *batch)
750 {
751     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
752     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
753     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
754     int beginmb = slice_param->macroblock_address;
755     int endmb = beginmb + slice_param->num_macroblocks;
756     int beginx = beginmb % width_in_mbs;
757     int beginy = beginmb / width_in_mbs;
758     int nextx =  endmb % width_in_mbs;
759     int nexty = endmb / width_in_mbs;
760     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
761     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
762     int maxQpN, maxQpP;
763     unsigned char correct[6], grow, shrink;
764     int i;
765     int weighted_pred_idc = 0;
766     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
767     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
768     int num_ref_l0 = 0, num_ref_l1 = 0;
769
770     if (batch == NULL)
771         batch = encoder_context->base.batch;
772
773     if (slice_type == SLICE_TYPE_I) {
774         luma_log2_weight_denom = 0;
775         chroma_log2_weight_denom = 0;
776     } else if (slice_type == SLICE_TYPE_P) {
777         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
778         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
779
780         if (slice_param->num_ref_idx_active_override_flag)
781             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
782     } else if (slice_type == SLICE_TYPE_B) {
783         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
784         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
785         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
786
787         if (slice_param->num_ref_idx_active_override_flag) {
788             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
789             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
790         }
791
792         if (weighted_pred_idc == 2) {
793             /* 8.4.3 - Derivation process for prediction weights (8-279) */
794             luma_log2_weight_denom = 5;
795             chroma_log2_weight_denom = 5;
796         }
797     }
798
799     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
800     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
801
802     for (i = 0; i < 6; i++)
803         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
804
805     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
806         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
807     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
808         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
809
810     BEGIN_BCS_BATCH(batch, 11);;
811
812     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
813     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
814
815     OUT_BCS_BATCH(batch,
816                   (num_ref_l0 << 16) |
817                   (num_ref_l1 << 24) |
818                   (chroma_log2_weight_denom << 8) |
819                   (luma_log2_weight_denom << 0));
820
821     OUT_BCS_BATCH(batch,
822                   (weighted_pred_idc << 30) |
823                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
824                   (slice_param->disable_deblocking_filter_idc << 27) |
825                   (slice_param->cabac_init_idc << 24) |
826                   (qp<<16) |                    /*Slice Quantization Parameter*/
827                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
828                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
829     OUT_BCS_BATCH(batch,
830                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
831                   (beginx << 16) |
832                   slice_param->macroblock_address );
833     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
834     OUT_BCS_BATCH(batch,
835                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
836                   (1 << 30) |           /*ResetRateControlCounter*/
837                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
838                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
839                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/
840                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
841                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/
842                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/
843                   (last_slice << 19) |     /*IsLastSlice*/
844                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
845                   (1 << 17) |       /*HeaderPresentFlag*/
846                   (1 << 16) |       /*SliceData PresentFlag*/
847                   (1 << 15) |       /*TailPresentFlag*/
848                   (1 << 13) |       /*RBSP NAL TYPE*/
849                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
850     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
851     OUT_BCS_BATCH(batch,
852                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/
853                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
854                   (shrink << 8)  |
855                   (grow << 0));
856     OUT_BCS_BATCH(batch,
857                   (correct[5] << 20) |
858                   (correct[4] << 16) |
859                   (correct[3] << 12) |
860                   (correct[2] << 8) |
861                   (correct[1] << 4) |
862                   (correct[0] << 0));
863     OUT_BCS_BATCH(batch, 0);
864
865     ADVANCE_BCS_BATCH(batch);
866 }
867
868
869 #ifdef MFC_SOFTWARE_HASWELL
870
871 static int
872 gen9_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
873                               int qp,unsigned int *msg,
874                               struct intel_encoder_context *encoder_context,
875                               unsigned char target_mb_size, unsigned char max_mb_size,
876                               struct intel_batchbuffer *batch)
877 {
878     int len_in_dwords = 12;
879     unsigned int intra_msg;
880 #define         INTRA_MSG_FLAG          (1 << 13)
881 #define         INTRA_MBTYPE_MASK       (0x1F0000)
882     if (batch == NULL)
883         batch = encoder_context->base.batch;
884
885     BEGIN_BCS_BATCH(batch, len_in_dwords);
886
887     intra_msg = msg[0] & 0xC0FF;
888     intra_msg |= INTRA_MSG_FLAG;
889     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
890     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
891     OUT_BCS_BATCH(batch, 0);
892     OUT_BCS_BATCH(batch, 0);
893     OUT_BCS_BATCH(batch,
894                   (0 << 24) |           /* PackedMvNum, Debug*/
895                   (0 << 20) |           /* No motion vector */
896                   (1 << 19) |           /* CbpDcY */
897                   (1 << 18) |           /* CbpDcU */
898                   (1 << 17) |           /* CbpDcV */
899                   intra_msg);
900
901     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
902     OUT_BCS_BATCH(batch, 0x000F000F);     /* Code Block Pattern */
903
904     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
905
906     /*Stuff for Intra MB*/
907     OUT_BCS_BATCH(batch, msg[1]);       /* We using Intra16x16 no 4x4 predmode*/
908     OUT_BCS_BATCH(batch, msg[2]);
909     OUT_BCS_BATCH(batch, msg[3]&0xFF);
910
911     /*MaxSizeInWord and TargetSzieInWord*/
912     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
913                   (target_mb_size << 16) );
914
915     OUT_BCS_BATCH(batch, 0);
916
917     ADVANCE_BCS_BATCH(batch);
918
919     return len_in_dwords;
920 }
921
922 static int
923 gen9_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
924                               unsigned int *msg, unsigned int offset,
925                               struct intel_encoder_context *encoder_context,
926                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
927                               struct intel_batchbuffer *batch)
928 {
929     struct gen6_vme_context *vme_context = encoder_context->vme_context;
930     int len_in_dwords = 12;
931     unsigned int inter_msg = 0;
932     if (batch == NULL)
933         batch = encoder_context->base.batch;
934     {
935 #define MSG_MV_OFFSET   4
936         unsigned int *mv_ptr;
937         mv_ptr = msg + MSG_MV_OFFSET;
938         /* MV of VME output is based on 16 sub-blocks. So it is necessary
939          * to convert them to be compatible with the format of AVC_PAK
940          * command.
941          */
942         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
943             /* MV[0] and MV[2] are replicated */
944             mv_ptr[4] = mv_ptr[0];
945             mv_ptr[5] = mv_ptr[1];
946             mv_ptr[2] = mv_ptr[8];
947             mv_ptr[3] = mv_ptr[9];
948             mv_ptr[6] = mv_ptr[8];
949             mv_ptr[7] = mv_ptr[9];
950         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
951             /* MV[0] and MV[1] are replicated */
952             mv_ptr[2] = mv_ptr[0];
953             mv_ptr[3] = mv_ptr[1];
954             mv_ptr[4] = mv_ptr[16];
955             mv_ptr[5] = mv_ptr[17];
956             mv_ptr[6] = mv_ptr[24];
957             mv_ptr[7] = mv_ptr[25];
958         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
959                    !(msg[1] & SUBMB_SHAPE_MASK)) {
960             /* Don't touch MV[0] or MV[1] */
961             mv_ptr[2] = mv_ptr[8];
962             mv_ptr[3] = mv_ptr[9];
963             mv_ptr[4] = mv_ptr[16];
964             mv_ptr[5] = mv_ptr[17];
965             mv_ptr[6] = mv_ptr[24];
966             mv_ptr[7] = mv_ptr[25];
967         }
968     }
969
970     BEGIN_BCS_BATCH(batch, len_in_dwords);
971
972     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
973
974     inter_msg = 32;
975     /* MV quantity */
976     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
977         if (msg[1] & SUBMB_SHAPE_MASK)
978             inter_msg = 128;
979     }
980     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
981     OUT_BCS_BATCH(batch, offset);
982     inter_msg = msg[0] & (0x1F00FFFF);
983     inter_msg |= INTER_MV8;
984     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
985     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
986         (msg[1] & SUBMB_SHAPE_MASK)) {
987         inter_msg |= INTER_MV32;
988     }
989
990     OUT_BCS_BATCH(batch, inter_msg);
991
992     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
993     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */
994 #if 0
995     if ( slice_type == SLICE_TYPE_B) {
996         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
997     } else {
998         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
999     }
1000 #else
1001     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1002 #endif
1003
1004     inter_msg = msg[1] >> 8;
1005     /*Stuff for Inter MB*/
1006     OUT_BCS_BATCH(batch, inter_msg);
1007     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1008     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1009
1010     /*MaxSizeInWord and TargetSzieInWord*/
1011     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1012                   (target_mb_size << 16) );
1013
1014     OUT_BCS_BATCH(batch, 0x0);
1015     ADVANCE_BCS_BATCH(batch);
1016
1017     return len_in_dwords;
1018 }
1019
1020 #define         AVC_INTRA_RDO_OFFSET    4
1021 #define         AVC_INTER_RDO_OFFSET    10
1022 #define         AVC_INTER_MSG_OFFSET    8
1023 #define         AVC_INTER_MV_OFFSET             48
1024 #define         AVC_RDO_MASK            0xFFFF
1025
1026 static void
1027 gen9_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1028                                        struct encode_state *encode_state,
1029                                        struct intel_encoder_context *encoder_context,
1030                                        int slice_index,
1031                                        struct intel_batchbuffer *slice_batch)
1032 {
1033     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1034     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1035     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1036     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1037     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1038     unsigned int *msg = NULL, offset = 0;
1039     unsigned char *msg_ptr = NULL;
1040     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1041     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1042     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1043     int i,x,y;
1044     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1045     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1046     unsigned int tail_data[] = { 0x0, 0x0 };
1047     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1048     int is_intra = slice_type == SLICE_TYPE_I;
1049     int qp_slice;
1050
1051     qp_slice = qp;
1052     if (rate_control_mode == VA_RC_CBR) {
1053         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1054         if (encode_state->slice_header_index[slice_index] == 0) {
1055             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1056             qp_slice = qp;
1057         }
1058     }
1059
1060     /* only support for 8-bit pixel bit-depth */
1061     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1062     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1063     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1064     assert(qp >= 0 && qp < 52);
1065
1066          gen9_mfc_avc_slice_state(ctx,
1067                                   pPicParameter,
1068                                   pSliceParameter,
1069                                   encode_state, encoder_context,
1070                                   (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
1071
1072         if ( slice_index == 0)
1073             intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1074
1075          intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1076
1077     dri_bo_map(vme_context->vme_output.bo , 1);
1078     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1079
1080     if (is_intra) {
1081         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1082     } else {
1083         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1084     }
1085
1086     for (i = pSliceParameter->macroblock_address;
1087         i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1088         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1089         x = i % width_in_mbs;
1090         y = i / width_in_mbs;
1091         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1092
1093         if (is_intra) {
1094             assert(msg);
1095             gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1096         } else {
1097             int inter_rdo, intra_rdo;
1098             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1099             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1100             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1101             if (intra_rdo < inter_rdo) {
1102                 gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1103             } else {
1104                 msg += AVC_INTER_MSG_OFFSET;
1105                 gen9_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1106             }
1107         }
1108     }
1109
1110     dri_bo_unmap(vme_context->vme_output.bo);
1111
1112     if ( last_slice ) {
1113         mfc_context->insert_object(ctx, encoder_context,
1114                                    tail_data, 2, 8,
1115                                    2, 1, 1, 0, slice_batch);
1116     } else {
1117         mfc_context->insert_object(ctx, encoder_context,
1118                                    tail_data, 1, 8,
1119                                    1, 1, 1, 0, slice_batch);
1120     }
1121
1122
1123 }
1124
1125 static dri_bo *
1126 gen9_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1127                                   struct encode_state *encode_state,
1128                                   struct intel_encoder_context *encoder_context)
1129 {
1130     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1131     struct intel_batchbuffer *batch;
1132     dri_bo *batch_bo;
1133     int i;
1134
1135     batch = mfc_context->aux_batchbuffer;
1136     batch_bo = batch->buffer;
1137     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1138         gen9_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1139     }
1140
1141     intel_batchbuffer_align(batch, 8);
1142
1143     BEGIN_BCS_BATCH(batch, 2);
1144     OUT_BCS_BATCH(batch, 0);
1145     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1146     ADVANCE_BCS_BATCH(batch);
1147
1148     dri_bo_reference(batch_bo);
1149     intel_batchbuffer_free(batch);
1150     mfc_context->aux_batchbuffer = NULL;
1151
1152     return batch_bo;
1153 }
1154
1155 #else
1156
1157 static void
1158 gen9_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1159                                     struct encode_state *encode_state,
1160                                     struct intel_encoder_context *encoder_context)
1161
1162 {
1163     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1164     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1165
1166     assert(vme_context->vme_output.bo);
1167     mfc_context->buffer_suface_setup(ctx,
1168                                      &mfc_context->gpe_context,
1169                                      &vme_context->vme_output,
1170                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1171                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1172     assert(mfc_context->aux_batchbuffer_surface.bo);
1173     mfc_context->buffer_suface_setup(ctx,
1174                                      &mfc_context->gpe_context,
1175                                      &mfc_context->aux_batchbuffer_surface,
1176                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1177                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1178 }
1179
1180 static void
1181 gen9_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1182                                      struct encode_state *encode_state,
1183                                      struct intel_encoder_context *encoder_context)
1184
1185 {
1186     struct i965_driver_data *i965 = i965_driver_data(ctx);
1187     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1188     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1189     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1190     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1191     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1192     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1193     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1194     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
1195                                                            "MFC batchbuffer",
1196                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1197                                                            0x1000);
1198     mfc_context->buffer_suface_setup(ctx,
1199                                      &mfc_context->gpe_context,
1200                                      &mfc_context->mfc_batchbuffer_surface,
1201                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1202                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1203 }
1204
1205 static void
1206 gen9_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1207                                     struct encode_state *encode_state,
1208                                     struct intel_encoder_context *encoder_context)
1209 {
1210     gen9_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1211     gen9_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1212 }
1213
1214 static void
1215 gen9_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1216                                 struct encode_state *encode_state,
1217                                 struct intel_encoder_context *encoder_context)
1218 {
1219     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1220     struct gen6_interface_descriptor_data *desc;
1221     int i;
1222     dri_bo *bo;
1223
1224     bo = mfc_context->gpe_context.idrt.bo;
1225     dri_bo_map(bo, 1);
1226     assert(bo->virtual);
1227     desc = bo->virtual;
1228
1229     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1230         struct i965_kernel *kernel;
1231
1232         kernel = &mfc_context->gpe_context.kernels[i];
1233         assert(sizeof(*desc) == 32);
1234
1235         /*Setup the descritor table*/
1236         memset(desc, 0, sizeof(*desc));
1237         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1238         desc->desc2.sampler_count = 0;
1239         desc->desc2.sampler_state_pointer = 0;
1240         desc->desc3.binding_table_entry_count = 2;
1241         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1242         desc->desc4.constant_urb_entry_read_offset = 0;
1243         desc->desc4.constant_urb_entry_read_length = 4;
1244
1245         /*kernel start*/
1246         dri_bo_emit_reloc(bo,
1247                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1248                           0,
1249                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1250                           kernel->bo);
1251         desc++;
1252     }
1253
1254     dri_bo_unmap(bo);
1255 }
1256
1257 static void
1258 gen9_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1259                                     struct encode_state *encode_state,
1260                                     struct intel_encoder_context *encoder_context)
1261 {
1262     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1263
1264     (void)mfc_context;
1265 }
1266
1267 static void
1268 gen9_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1269                                          int index,
1270                                          int head_offset,
1271                                          int batchbuffer_offset,
1272                                          int head_size,
1273                                          int tail_size,
1274                                          int number_mb_cmds,
1275                                          int first_object,
1276                                          int last_object,
1277                                          int last_slice,
1278                                          int mb_x,
1279                                          int mb_y,
1280                                          int width_in_mbs,
1281                                          int qp)
1282 {
1283     BEGIN_BATCH(batch, 12);
1284
1285     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1286     OUT_BATCH(batch, index);
1287     OUT_BATCH(batch, 0);
1288     OUT_BATCH(batch, 0);
1289     OUT_BATCH(batch, 0);
1290     OUT_BATCH(batch, 0);
1291
1292     /*inline data */
1293     OUT_BATCH(batch, head_offset);
1294     OUT_BATCH(batch, batchbuffer_offset);
1295     OUT_BATCH(batch,
1296               head_size << 16 |
1297               tail_size);
1298     OUT_BATCH(batch,
1299               number_mb_cmds << 16 |
1300               first_object << 2 |
1301               last_object << 1 |
1302               last_slice);
1303     OUT_BATCH(batch,
1304               mb_y << 8 |
1305               mb_x);
1306     OUT_BATCH(batch,
1307               qp << 16 |
1308               width_in_mbs);
1309
1310     ADVANCE_BATCH(batch);
1311 }
1312
1313 static void
1314 gen9_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1315                                        struct intel_encoder_context *encoder_context,
1316                                        VAEncSliceParameterBufferH264 *slice_param,
1317                                        int head_offset,
1318                                        unsigned short head_size,
1319                                        unsigned short tail_size,
1320                                        int batchbuffer_offset,
1321                                        int qp,
1322                                        int last_slice)
1323 {
1324     struct intel_batchbuffer *batch = encoder_context->base.batch;
1325     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1326     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1327     int total_mbs = slice_param->num_macroblocks;
1328     int number_mb_cmds = 128;
1329     int starting_mb = 0;
1330     int last_object = 0;
1331     int first_object = 1;
1332     int i;
1333     int mb_x, mb_y;
1334     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1335
1336     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1337         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1338         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1339         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1340         assert(mb_x <= 255 && mb_y <= 255);
1341
1342         starting_mb += number_mb_cmds;
1343
1344         gen9_mfc_batchbuffer_emit_object_command(batch,
1345                                                  index,
1346                                                  head_offset,
1347                                                  batchbuffer_offset,
1348                                                  head_size,
1349                                                  tail_size,
1350                                                  number_mb_cmds,
1351                                                  first_object,
1352                                                  last_object,
1353                                                  last_slice,
1354                                                  mb_x,
1355                                                  mb_y,
1356                                                  width_in_mbs,
1357                                                  qp);
1358
1359         if (first_object) {
1360             head_offset += head_size;
1361             batchbuffer_offset += head_size;
1362         }
1363
1364         if (last_object) {
1365             head_offset += tail_size;
1366             batchbuffer_offset += tail_size;
1367         }
1368
1369         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1370
1371         first_object = 0;
1372     }
1373
1374     if (!last_object) {
1375         last_object = 1;
1376         number_mb_cmds = total_mbs % number_mb_cmds;
1377         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1378         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1379         assert(mb_x <= 255 && mb_y <= 255);
1380         starting_mb += number_mb_cmds;
1381
1382         gen9_mfc_batchbuffer_emit_object_command(batch,
1383                                                  index,
1384                                                  head_offset,
1385                                                  batchbuffer_offset,
1386                                                  head_size,
1387                                                  tail_size,
1388                                                  number_mb_cmds,
1389                                                  first_object,
1390                                                  last_object,
1391                                                  last_slice,
1392                                                  mb_x,
1393                                                  mb_y,
1394                                                  width_in_mbs,
1395                                                  qp);
1396     }
1397 }
1398
1399 /*
1400  * return size in Owords (16bytes)
1401  */
1402 static int
1403 gen9_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1404                                struct encode_state *encode_state,
1405                                struct intel_encoder_context *encoder_context,
1406                                int slice_index,
1407                                int batchbuffer_offset)
1408 {
1409     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1410     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1411     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1412     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1413     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1414     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1415     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1416     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1417     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1418     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1419     unsigned int tail_data[] = { 0x0, 0x0 };
1420     long head_offset;
1421     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1422     unsigned short head_size, tail_size;
1423     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1424     int qp_slice;
1425
1426     qp_slice = qp;
1427     if (rate_control_mode == VA_RC_CBR) {
1428         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1429         if (encode_state->slice_header_index[slice_index] == 0) {
1430             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1431             qp_slice = qp;
1432         }
1433     }
1434
1435     /* only support for 8-bit pixel bit-depth */
1436     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1437     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1438     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1439     assert(qp >= 0 && qp < 52);
1440
1441     head_offset = old_used / 16;
1442     gen9_mfc_avc_slice_state(ctx,
1443                              pPicParameter,
1444                              pSliceParameter,
1445                              encode_state,
1446                              encoder_context,
1447                              (rate_control_mode == VA_RC_CBR),
1448                              qp_slice,
1449                              slice_batch);
1450
1451     if (slice_index == 0)
1452         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1453
1454
1455     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1456
1457
1458     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1459     used = intel_batchbuffer_used_size(slice_batch);
1460     head_size = (used - old_used) / 16;
1461     old_used = used;
1462
1463     /* tail */
1464     if (last_slice) {
1465         mfc_context->insert_object(ctx,
1466                                    encoder_context,
1467                                    tail_data,
1468                                    2,
1469                                    8,
1470                                    2,
1471                                    1,
1472                                    1,
1473                                    0,
1474                                    slice_batch);
1475     } else {
1476         mfc_context->insert_object(ctx,
1477                                    encoder_context,
1478                                    tail_data,
1479                                    1,
1480                                    8,
1481                                    1,
1482                                    1,
1483                                    1,
1484                                    0,
1485                                    slice_batch);
1486     }
1487
1488     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1489     used = intel_batchbuffer_used_size(slice_batch);
1490     tail_size = (used - old_used) / 16;
1491
1492     gen9_mfc_avc_batchbuffer_slice_command(ctx,
1493                                            encoder_context,
1494                                            pSliceParameter,
1495                                            head_offset,
1496                                            head_size,
1497                                            tail_size,
1498                                            batchbuffer_offset,
1499                                            qp,
1500                                            last_slice);
1501
1502     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1503 }
1504
1505 static void
1506 gen9_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1507                                   struct encode_state *encode_state,
1508                                   struct intel_encoder_context *encoder_context)
1509 {
1510     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1511     struct intel_batchbuffer *batch = encoder_context->base.batch;
1512     int i, size, offset = 0;
1513
1514     intel_batchbuffer_start_atomic(batch, 0x4000);
1515     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1516
1517     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1518         size = gen9_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1519         offset += size;
1520     }
1521
1522     intel_batchbuffer_end_atomic(batch);
1523     intel_batchbuffer_flush(batch);
1524 }
1525
1526 static void
1527 gen9_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1528                                struct encode_state *encode_state,
1529                                struct intel_encoder_context *encoder_context)
1530 {
1531     gen9_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1532     gen9_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1533     gen9_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1534     gen9_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1535 }
1536
1537 static dri_bo *
1538 gen9_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1539                                   struct encode_state *encode_state,
1540                                   struct intel_encoder_context *encoder_context)
1541 {
1542     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1543
1544     gen9_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1545     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1546
1547     return mfc_context->mfc_batchbuffer_surface.bo;
1548 }
1549
1550 #endif
1551
1552 static void
1553 gen9_mfc_avc_pipeline_programing(VADriverContextP ctx,
1554                                  struct encode_state *encode_state,
1555                                  struct intel_encoder_context *encoder_context)
1556 {
1557     struct intel_batchbuffer *batch = encoder_context->base.batch;
1558     dri_bo *slice_batch_bo;
1559
1560     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1561         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1562         assert(0);
1563         return;
1564     }
1565
1566 #ifdef MFC_SOFTWARE_HASWELL
1567     slice_batch_bo = gen9_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1568 #else
1569     slice_batch_bo = gen9_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1570 #endif
1571
1572     // begin programing
1573     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1574     intel_batchbuffer_emit_mi_flush(batch);
1575
1576     // picture level programing
1577     gen9_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1578
1579     BEGIN_BCS_BATCH(batch, 3);
1580     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1581     OUT_BCS_RELOC(batch,
1582                   slice_batch_bo,
1583                   I915_GEM_DOMAIN_COMMAND, 0,
1584                   0);
1585     OUT_BCS_BATCH(batch, 0);
1586     ADVANCE_BCS_BATCH(batch);
1587
1588     // end programing
1589     intel_batchbuffer_end_atomic(batch);
1590
1591     dri_bo_unreference(slice_batch_bo);
1592 }
1593
1594
1595 static VAStatus
1596 gen9_mfc_avc_encode_picture(VADriverContextP ctx,
1597                             struct encode_state *encode_state,
1598                             struct intel_encoder_context *encoder_context)
1599 {
1600     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1601     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1602     int current_frame_bits_size;
1603     int sts;
1604
1605     for (;;) {
1606         gen9_mfc_init(ctx, encode_state, encoder_context);
1607         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1608         /*Programing bcs pipeline*/
1609         gen9_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
1610         gen9_mfc_run(ctx, encode_state, encoder_context);
1611         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1612             gen9_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1613             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1614             if (sts == BRC_NO_HRD_VIOLATION) {
1615                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1616                 break;
1617             }
1618             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1619                 if (!mfc_context->hrd.violation_noted) {
1620                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1621                     mfc_context->hrd.violation_noted = 1;
1622                 }
1623                 return VA_STATUS_SUCCESS;
1624             }
1625         } else {
1626             break;
1627         }
1628     }
1629
1630     return VA_STATUS_SUCCESS;
1631 }
1632
1633 /*
1634  * MPEG-2
1635  */
1636
1637 static const int
1638 va_to_gen9_mpeg2_picture_type[3] = {
1639     1,  /* I */
1640     2,  /* P */
1641     3   /* B */
1642 };
1643
1644 static void
1645 gen9_mfc_mpeg2_pic_state(VADriverContextP ctx,
1646                          struct intel_encoder_context *encoder_context,
1647                          struct encode_state *encode_state)
1648 {
1649     struct intel_batchbuffer *batch = encoder_context->base.batch;
1650     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1651     VAEncPictureParameterBufferMPEG2 *pic_param;
1652     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1653     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1654     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1655
1656     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1657     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1658     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1659
1660     BEGIN_BCS_BATCH(batch, 13);
1661     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1662     OUT_BCS_BATCH(batch,
1663                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1664                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1665                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1666                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1667                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1668                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1669                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1670                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1671                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1672                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1673                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1674                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1675     OUT_BCS_BATCH(batch,
1676                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1677                   va_to_gen9_mpeg2_picture_type[pic_param->picture_type] << 9 |
1678                   0);
1679     OUT_BCS_BATCH(batch,
1680                   1 << 31 |     /* slice concealment */
1681                   (height_in_mbs - 1) << 16 |
1682                   (width_in_mbs - 1));
1683
1684     if (slice_param && slice_param->quantiser_scale_code >= 14)
1685         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1686     else
1687         OUT_BCS_BATCH(batch, 0);
1688
1689     OUT_BCS_BATCH(batch, 0);
1690     OUT_BCS_BATCH(batch,
1691                   0xFFF << 16 | /* InterMBMaxSize */
1692                   0xFFF << 0 |  /* IntraMBMaxSize */
1693                   0);
1694     OUT_BCS_BATCH(batch, 0);
1695     OUT_BCS_BATCH(batch, 0);
1696     OUT_BCS_BATCH(batch, 0);
1697     OUT_BCS_BATCH(batch, 0);
1698     OUT_BCS_BATCH(batch, 0);
1699     OUT_BCS_BATCH(batch, 0);
1700     ADVANCE_BCS_BATCH(batch);
1701 }
1702
1703 static void
1704 gen9_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1705 {
1706     unsigned char intra_qm[64] = {
1707         8, 16, 19, 22, 26, 27, 29, 34,
1708         16, 16, 22, 24, 27, 29, 34, 37,
1709         19, 22, 26, 27, 29, 34, 34, 38,
1710         22, 22, 26, 27, 29, 34, 37, 40,
1711         22, 26, 27, 29, 32, 35, 40, 48,
1712         26, 27, 29, 32, 35, 40, 48, 58,
1713         26, 27, 29, 34, 38, 46, 56, 69,
1714         27, 29, 35, 38, 46, 56, 69, 83
1715     };
1716
1717     unsigned char non_intra_qm[64] = {
1718         16, 16, 16, 16, 16, 16, 16, 16,
1719         16, 16, 16, 16, 16, 16, 16, 16,
1720         16, 16, 16, 16, 16, 16, 16, 16,
1721         16, 16, 16, 16, 16, 16, 16, 16,
1722         16, 16, 16, 16, 16, 16, 16, 16,
1723         16, 16, 16, 16, 16, 16, 16, 16,
1724         16, 16, 16, 16, 16, 16, 16, 16,
1725         16, 16, 16, 16, 16, 16, 16, 16
1726     };
1727
1728     gen9_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1729     gen9_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1730 }
1731
1732 static void
1733 gen9_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1734 {
1735     unsigned short intra_fqm[64] = {
1736         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1737         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1738         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1739         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1740         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1741         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1742         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1743         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1744     };
1745
1746     unsigned short non_intra_fqm[64] = {
1747         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1748         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1749         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1750         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1751         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1752         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1753         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1754         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1755     };
1756
1757     gen9_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1758     gen9_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1759 }
1760
1761 static void
1762 gen9_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1763                                 struct intel_encoder_context *encoder_context,
1764                                 int x, int y,
1765                                 int next_x, int next_y,
1766                                 int is_fisrt_slice_group,
1767                                 int is_last_slice_group,
1768                                 int intra_slice,
1769                                 int qp,
1770                                 struct intel_batchbuffer *batch)
1771 {
1772     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1773
1774     if (batch == NULL)
1775         batch = encoder_context->base.batch;
1776
1777     BEGIN_BCS_BATCH(batch, 8);
1778
1779     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1780     OUT_BCS_BATCH(batch,
1781                   0 << 31 |                             /* MbRateCtrlFlag */
1782                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1783                   1 << 17 |                             /* Insert Header before the first slice group data */
1784                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1785                   1 << 15 |                             /* TailPresentFlag: always 1 */
1786                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1787                   !!intra_slice << 13 |                 /* IntraSlice */
1788                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1789                   0);
1790     OUT_BCS_BATCH(batch,
1791                   next_y << 24 |
1792                   next_x << 16 |
1793                   y << 8 |
1794                   x << 0 |
1795                   0);
1796     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1797     /* bitstream pointer is only loaded once for the first slice of a frame when
1798      * LoadSlicePointerFlag is 0
1799      */
1800     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1801     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1802     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1803     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1804
1805     ADVANCE_BCS_BATCH(batch);
1806 }
1807
1808 static int
1809 gen9_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1810                                 struct intel_encoder_context *encoder_context,
1811                                 int x, int y,
1812                                 int first_mb_in_slice,
1813                                 int last_mb_in_slice,
1814                                 int first_mb_in_slice_group,
1815                                 int last_mb_in_slice_group,
1816                                 int mb_type,
1817                                 int qp_scale_code,
1818                                 int coded_block_pattern,
1819                                 unsigned char target_size_in_word,
1820                                 unsigned char max_size_in_word,
1821                                 struct intel_batchbuffer *batch)
1822 {
1823     int len_in_dwords = 9;
1824
1825     if (batch == NULL)
1826         batch = encoder_context->base.batch;
1827
1828     BEGIN_BCS_BATCH(batch, len_in_dwords);
1829
1830     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1831     OUT_BCS_BATCH(batch,
1832                   0 << 24 |     /* PackedMvNum */
1833                   0 << 20 |     /* MvFormat */
1834                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1835                   0 << 15 |     /* TransformFlag: frame DCT */
1836                   0 << 14 |     /* FieldMbFlag */
1837                   1 << 13 |     /* IntraMbFlag */
1838                   mb_type << 8 |   /* MbType: Intra */
1839                   0 << 2 |      /* SkipMbFlag */
1840                   0 << 0 |      /* InterMbMode */
1841                   0);
1842     OUT_BCS_BATCH(batch, y << 16 | x);
1843     OUT_BCS_BATCH(batch,
1844                   max_size_in_word << 24 |
1845                   target_size_in_word << 16 |
1846                   coded_block_pattern << 6 |      /* CBP */
1847                   0);
1848     OUT_BCS_BATCH(batch,
1849                   last_mb_in_slice << 31 |
1850                   first_mb_in_slice << 30 |
1851                   0 << 27 |     /* EnableCoeffClamp */
1852                   last_mb_in_slice_group << 26 |
1853                   0 << 25 |     /* MbSkipConvDisable */
1854                   first_mb_in_slice_group << 24 |
1855                   0 << 16 |     /* MvFieldSelect */
1856                   qp_scale_code << 0 |
1857                   0);
1858     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1859     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1860     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1861     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1862
1863     ADVANCE_BCS_BATCH(batch);
1864
1865     return len_in_dwords;
1866 }
1867
1868 /* Byte offset */
1869 #define MPEG2_INTER_MV_OFFSET   48
1870
1871 static struct _mv_ranges
1872 {
1873     int low;    /* in the unit of 1/2 pixel */
1874     int high;   /* in the unit of 1/2 pixel */
1875 } mv_ranges[] = {
1876     {0, 0},
1877     {-16, 15},
1878     {-32, 31},
1879     {-64, 63},
1880     {-128, 127},
1881     {-256, 255},
1882     {-512, 511},
1883     {-1024, 1023},
1884     {-2048, 2047},
1885     {-4096, 4095}
1886 };
1887
1888 static int
1889 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1890 {
1891     if (mv + pos * 16 * 2 < 0 ||
1892         mv + (pos + 1) * 16 * 2 > display_max * 2)
1893         mv = 0;
1894
1895     if (f_code > 0 && f_code < 10) {
1896         if (mv < mv_ranges[f_code].low)
1897             mv = mv_ranges[f_code].low;
1898
1899         if (mv > mv_ranges[f_code].high)
1900             mv = mv_ranges[f_code].high;
1901     }
1902
1903     return mv;
1904 }
1905
1906 static int
1907 gen9_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
1908                                 struct encode_state *encode_state,
1909                                 struct intel_encoder_context *encoder_context,
1910                                 unsigned int *msg,
1911                                 int width_in_mbs, int height_in_mbs,
1912                                 int x, int y,
1913                                 int first_mb_in_slice,
1914                                 int last_mb_in_slice,
1915                                 int first_mb_in_slice_group,
1916                                 int last_mb_in_slice_group,
1917                                 int qp_scale_code,
1918                                 unsigned char target_size_in_word,
1919                                 unsigned char max_size_in_word,
1920                                 struct intel_batchbuffer *batch)
1921 {
1922     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1923     int len_in_dwords = 9;
1924     short *mvptr, mvx0, mvy0, mvx1, mvy1;
1925
1926     if (batch == NULL)
1927         batch = encoder_context->base.batch;
1928
1929     mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
1930     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
1931     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
1932     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
1933     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
1934
1935     BEGIN_BCS_BATCH(batch, len_in_dwords);
1936
1937     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1938     OUT_BCS_BATCH(batch,
1939                   2 << 24 |     /* PackedMvNum */
1940                   7 << 20 |     /* MvFormat */
1941                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1942                   0 << 15 |     /* TransformFlag: frame DCT */
1943                   0 << 14 |     /* FieldMbFlag */
1944                   0 << 13 |     /* IntraMbFlag */
1945                   1 << 8 |      /* MbType: Frame-based */
1946                   0 << 2 |      /* SkipMbFlag */
1947                   0 << 0 |      /* InterMbMode */
1948                   0);
1949     OUT_BCS_BATCH(batch, y << 16 | x);
1950     OUT_BCS_BATCH(batch,
1951                   max_size_in_word << 24 |
1952                   target_size_in_word << 16 |
1953                   0x3f << 6 |   /* CBP */
1954                   0);
1955     OUT_BCS_BATCH(batch,
1956                   last_mb_in_slice << 31 |
1957                   first_mb_in_slice << 30 |
1958                   0 << 27 |     /* EnableCoeffClamp */
1959                   last_mb_in_slice_group << 26 |
1960                   0 << 25 |     /* MbSkipConvDisable */
1961                   first_mb_in_slice_group << 24 |
1962                   0 << 16 |     /* MvFieldSelect */
1963                   qp_scale_code << 0 |
1964                   0);
1965
1966     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
1967     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
1968     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1969     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1970
1971     ADVANCE_BCS_BATCH(batch);
1972
1973     return len_in_dwords;
1974 }
1975
1976 static void
1977 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
1978                                            struct encode_state *encode_state,
1979                                            struct intel_encoder_context *encoder_context,
1980                                            struct intel_batchbuffer *slice_batch)
1981 {
1982     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1983     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
1984
1985     if (encode_state->packed_header_data[idx]) {
1986         VAEncPackedHeaderParameterBuffer *param = NULL;
1987         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1988         unsigned int length_in_bits;
1989
1990         assert(encode_state->packed_header_param[idx]);
1991         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1992         length_in_bits = param->bit_length;
1993
1994         mfc_context->insert_object(ctx,
1995                                    encoder_context,
1996                                    header_data,
1997                                    ALIGN(length_in_bits, 32) >> 5,
1998                                    length_in_bits & 0x1f,
1999                                    5,   /* FIXME: check it */
2000                                    0,
2001                                    0,
2002                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2003                                    slice_batch);
2004     }
2005
2006     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2007
2008     if (encode_state->packed_header_data[idx]) {
2009         VAEncPackedHeaderParameterBuffer *param = NULL;
2010         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2011         unsigned int length_in_bits;
2012
2013         assert(encode_state->packed_header_param[idx]);
2014         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2015         length_in_bits = param->bit_length;
2016
2017         mfc_context->insert_object(ctx,
2018                                    encoder_context,
2019                                    header_data,
2020                                    ALIGN(length_in_bits, 32) >> 5,
2021                                    length_in_bits & 0x1f,
2022                                    5,   /* FIXME: check it */
2023                                    0,
2024                                    0,
2025                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2026                                    slice_batch);
2027     }
2028 }
2029
2030 static void
2031 gen9_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2032                                     struct encode_state *encode_state,
2033                                     struct intel_encoder_context *encoder_context,
2034                                     int slice_index,
2035                                     VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2036                                     struct intel_batchbuffer *slice_batch)
2037 {
2038     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2039     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2040     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2041     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2042     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2043     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2044     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2045     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2046     int i, j;
2047     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2048     unsigned int *msg = NULL;
2049     unsigned char *msg_ptr = NULL;
2050
2051     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2052     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2053     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2054     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2055
2056     dri_bo_map(vme_context->vme_output.bo , 0);
2057     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2058
2059     if (next_slice_group_param) {
2060         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2061         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2062     } else {
2063         h_next_start_pos = 0;
2064         v_next_start_pos = height_in_mbs;
2065     }
2066
2067     gen9_mfc_mpeg2_slicegroup_state(ctx,
2068                                     encoder_context,
2069                                     h_start_pos,
2070                                     v_start_pos,
2071                                     h_next_start_pos,
2072                                     v_next_start_pos,
2073                                     slice_index == 0,
2074                                     next_slice_group_param == NULL,
2075                                     slice_param->is_intra_slice,
2076                                     slice_param->quantiser_scale_code,
2077                                     slice_batch);
2078
2079     if (slice_index == 0)
2080         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2081
2082     /* Insert '00' to make sure the header is valid */
2083     mfc_context->insert_object(ctx,
2084                                encoder_context,
2085                                (unsigned int*)section_delimiter,
2086                                1,
2087                                8,   /* 8bits in the last DWORD */
2088                                1,   /* 1 byte */
2089                                1,
2090                                0,
2091                                0,
2092                                slice_batch);
2093
2094     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2095         /* PAK for each macroblocks */
2096         for (j = 0; j < slice_param->num_macroblocks; j++) {
2097             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2098             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2099             int first_mb_in_slice = (j == 0);
2100             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2101             int first_mb_in_slice_group = (i == 0 && j == 0);
2102             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2103                                           j == slice_param->num_macroblocks - 1);
2104
2105             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2106
2107             if (slice_param->is_intra_slice) {
2108                 gen9_mfc_mpeg2_pak_object_intra(ctx,
2109                                                 encoder_context,
2110                                                 h_pos, v_pos,
2111                                                 first_mb_in_slice,
2112                                                 last_mb_in_slice,
2113                                                 first_mb_in_slice_group,
2114                                                 last_mb_in_slice_group,
2115                                                 0x1a,
2116                                                 slice_param->quantiser_scale_code,
2117                                                 0x3f,
2118                                                 0,
2119                                                 0xff,
2120                                                 slice_batch);
2121             } else {
2122                 int inter_rdo, intra_rdo;
2123                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2124                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2125
2126                 if (intra_rdo < inter_rdo)
2127                     gen9_mfc_mpeg2_pak_object_intra(ctx,
2128                                                      encoder_context,
2129                                                      h_pos, v_pos,
2130                                                      first_mb_in_slice,
2131                                                      last_mb_in_slice,
2132                                                      first_mb_in_slice_group,
2133                                                      last_mb_in_slice_group,
2134                                                      0x1a,
2135                                                      slice_param->quantiser_scale_code,
2136                                                      0x3f,
2137                                                      0,
2138                                                      0xff,
2139                                                      slice_batch);
2140                 else
2141                     gen9_mfc_mpeg2_pak_object_inter(ctx,
2142                                                 encode_state,
2143                                                 encoder_context,
2144                                                 msg,
2145                                                 width_in_mbs, height_in_mbs,
2146                                                 h_pos, v_pos,
2147                                                 first_mb_in_slice,
2148                                                 last_mb_in_slice,
2149                                                 first_mb_in_slice_group,
2150                                                 last_mb_in_slice_group,
2151                                                 slice_param->quantiser_scale_code,
2152                                                 0,
2153                                                 0xff,
2154                                                 slice_batch);
2155             }
2156         }
2157
2158         slice_param++;
2159     }
2160
2161     dri_bo_unmap(vme_context->vme_output.bo);
2162
2163     /* tail data */
2164     if (next_slice_group_param == NULL) { /* end of a picture */
2165         mfc_context->insert_object(ctx,
2166                                    encoder_context,
2167                                    (unsigned int *)tail_delimiter,
2168                                    2,
2169                                    8,   /* 8bits in the last DWORD */
2170                                    5,   /* 5 bytes */
2171                                    1,
2172                                    1,
2173                                    0,
2174                                    slice_batch);
2175     } else {        /* end of a lsice group */
2176         mfc_context->insert_object(ctx,
2177                                    encoder_context,
2178                                    (unsigned int *)section_delimiter,
2179                                    1,
2180                                    8,   /* 8bits in the last DWORD */
2181                                    1,   /* 1 byte */
2182                                    1,
2183                                    1,
2184                                    0,
2185                                    slice_batch);
2186     }
2187 }
2188
2189 /*
2190  * A batch buffer for all slices, including slice state
2191  * slice insert object and slice pak object commands
2192  */
2193 static dri_bo *
2194 gen9_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2195                                           struct encode_state *encode_state,
2196                                           struct intel_encoder_context *encoder_context)
2197 {
2198     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2199     struct intel_batchbuffer *batch;
2200     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2201     dri_bo *batch_bo;
2202     int i;
2203
2204     batch = mfc_context->aux_batchbuffer;
2205     batch_bo = batch->buffer;
2206
2207     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2208         if (i == encode_state->num_slice_params_ext - 1)
2209             next_slice_group_param = NULL;
2210         else
2211             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2212
2213         gen9_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2214     }
2215
2216     intel_batchbuffer_align(batch, 8);
2217
2218     BEGIN_BCS_BATCH(batch, 2);
2219     OUT_BCS_BATCH(batch, 0);
2220     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2221     ADVANCE_BCS_BATCH(batch);
2222
2223     dri_bo_reference(batch_bo);
2224     intel_batchbuffer_free(batch);
2225     mfc_context->aux_batchbuffer = NULL;
2226
2227     return batch_bo;
2228 }
2229
2230 static void
2231 gen9_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2232                                            struct encode_state *encode_state,
2233                                            struct intel_encoder_context *encoder_context)
2234 {
2235     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2236
2237     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2238     mfc_context->set_surface_state(ctx, encoder_context);
2239     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2240     gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
2241     gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2242     gen9_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2243     gen9_mfc_mpeg2_qm_state(ctx, encoder_context);
2244     gen9_mfc_mpeg2_fqm_state(ctx, encoder_context);
2245 }
2246
2247 static void
2248 gen9_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2249                                    struct encode_state *encode_state,
2250                                    struct intel_encoder_context *encoder_context)
2251 {
2252     struct intel_batchbuffer *batch = encoder_context->base.batch;
2253     dri_bo *slice_batch_bo;
2254
2255     slice_batch_bo = gen9_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2256
2257     // begin programing
2258     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
2259     intel_batchbuffer_emit_mi_flush(batch);
2260
2261     // picture level programing
2262     gen9_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2263
2264     BEGIN_BCS_BATCH(batch, 4);
2265     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2266     OUT_BCS_RELOC(batch,
2267                   slice_batch_bo,
2268                   I915_GEM_DOMAIN_COMMAND, 0,
2269                   0);
2270     OUT_BCS_BATCH(batch, 0);
2271     OUT_BCS_BATCH(batch, 0);
2272     ADVANCE_BCS_BATCH(batch);
2273
2274     // end programing
2275     intel_batchbuffer_end_atomic(batch);
2276
2277     dri_bo_unreference(slice_batch_bo);
2278 }
2279
2280 static VAStatus
2281 intel_mfc_mpeg2_prepare(VADriverContextP ctx,
2282                         struct encode_state *encode_state,
2283                         struct intel_encoder_context *encoder_context)
2284 {
2285     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2286     struct object_surface *obj_surface;
2287     struct object_buffer *obj_buffer;
2288     struct i965_coded_buffer_segment *coded_buffer_segment;
2289     VAStatus vaStatus = VA_STATUS_SUCCESS;
2290     dri_bo *bo;
2291     int i;
2292
2293     /* reconstructed surface */
2294     obj_surface = encode_state->reconstructed_object;
2295     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2296     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2297     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2298     mfc_context->surface_state.width = obj_surface->orig_width;
2299     mfc_context->surface_state.height = obj_surface->orig_height;
2300     mfc_context->surface_state.w_pitch = obj_surface->width;
2301     mfc_context->surface_state.h_pitch = obj_surface->height;
2302
2303     /* forward reference */
2304     obj_surface = encode_state->reference_objects[0];
2305
2306     if (obj_surface && obj_surface->bo) {
2307         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2308         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2309     } else
2310         mfc_context->reference_surfaces[0].bo = NULL;
2311
2312     /* backward reference */
2313     obj_surface = encode_state->reference_objects[1];
2314
2315     if (obj_surface && obj_surface->bo) {
2316         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2317         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2318     } else {
2319         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2320
2321         if (mfc_context->reference_surfaces[1].bo)
2322             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2323     }
2324
2325     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2326         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2327
2328         if (mfc_context->reference_surfaces[i].bo)
2329             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2330     }
2331
2332     /* input YUV surface */
2333     obj_surface = encode_state->input_yuv_object;
2334     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2335     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2336
2337     /* coded buffer */
2338     obj_buffer = encode_state->coded_buf_object;
2339     bo = obj_buffer->buffer_store->bo;
2340     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2341     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2342     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2343     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2344
2345     /* set the internal flag to 0 to indicate the coded size is unknown */
2346     dri_bo_map(bo, 1);
2347     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2348     coded_buffer_segment->mapped = 0;
2349     coded_buffer_segment->codec = encoder_context->codec;
2350     dri_bo_unmap(bo);
2351
2352     return vaStatus;
2353 }
2354
2355 static VAStatus
2356 gen9_mfc_mpeg2_encode_picture(VADriverContextP ctx,
2357                               struct encode_state *encode_state,
2358                               struct intel_encoder_context *encoder_context)
2359 {
2360     gen9_mfc_init(ctx, encode_state, encoder_context);
2361     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2362     /*Programing bcs pipeline*/
2363     gen9_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2364     gen9_mfc_run(ctx, encode_state, encoder_context);
2365
2366     return VA_STATUS_SUCCESS;
2367 }
2368
2369 static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
2370                                VAEncPictureParameterBufferVP8 *pic_param,
2371                                VAQMatrixBufferVP8 *q_matrix)
2372 {
2373
2374     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
2375     unsigned char *coeff_probs_stream_in_buffer;
2376     
2377     mfc_context->vp8_state.frame_header_lf_update_pos = 0;
2378     mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
2379     mfc_context->vp8_state.frame_header_token_update_pos = 0;
2380     mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
2381
2382     mfc_context->vp8_state.prob_skip_false = 255;
2383     memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
2384     memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
2385     
2386     if (is_key_frame) {
2387         memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
2388         memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
2389
2390         mfc_context->vp8_state.prob_intra = 255;
2391         mfc_context->vp8_state.prob_last = 128;
2392         mfc_context->vp8_state.prob_gf = 128;
2393     } else {
2394         memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
2395         memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
2396
2397         mfc_context->vp8_state.prob_intra = 63;
2398         mfc_context->vp8_state.prob_last = 128;
2399         mfc_context->vp8_state.prob_gf = 128;
2400     }
2401     
2402     mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
2403   
2404     dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
2405     coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
2406     assert(coeff_probs_stream_in_buffer);
2407     memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
2408     dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
2409 }
2410
2411 static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
2412                                  VAQMatrixBufferVP8 *q_matrix)
2413 {
2414
2415     /*some other probabilities need to be updated*/
2416 }
2417
2418 extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
2419                            VAEncPictureParameterBufferVP8 *pic_param,
2420                            VAQMatrixBufferVP8 *q_matrix,
2421                            struct gen6_mfc_context *mfc_context);
2422
2423 static void vp8_enc_frame_header_binarize(struct encode_state *encode_state, 
2424                                           struct gen6_mfc_context *mfc_context)
2425 {
2426     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2427     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2428     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2429     unsigned char *frame_header_buffer;
2430
2431     binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context);
2432  
2433     dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
2434     frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
2435     assert(frame_header_buffer);
2436     memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
2437     dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
2438 }
2439
2440 #define MAX_VP8_FRAME_HEADER_SIZE              0x2000
2441 #define VP8_TOKEN_STATISTICS_BUFFER_SIZE       0x2000
2442
2443 static void gen9_mfc_vp8_init(VADriverContextP ctx,
2444                           struct encode_state *encode_state,
2445                           struct intel_encoder_context *encoder_context)
2446 {
2447     struct i965_driver_data *i965 = i965_driver_data(ctx);
2448     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2449     dri_bo *bo;
2450     int i;
2451     int width_in_mbs = 0;
2452     int height_in_mbs = 0;
2453     int slice_batchbuffer_size;
2454
2455     VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2456     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2457     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2458
2459     width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
2460     height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
2461
2462     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
2463         (SLICE_HEADER + SLICE_TAIL);
2464
2465     /*Encode common setup for MFC*/
2466     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2467     mfc_context->post_deblocking_output.bo = NULL;
2468
2469     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2470     mfc_context->pre_deblocking_output.bo = NULL;
2471
2472     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2473     mfc_context->uncompressed_picture_source.bo = NULL;
2474
2475     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
2476     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2477
2478     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2479         if ( mfc_context->direct_mv_buffers[i].bo != NULL)
2480             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2481         mfc_context->direct_mv_buffers[i].bo = NULL;
2482     }
2483
2484     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2485         if (mfc_context->reference_surfaces[i].bo != NULL)
2486             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2487         mfc_context->reference_surfaces[i].bo = NULL;
2488     }
2489
2490     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2491     bo = dri_bo_alloc(i965->intel.bufmgr,
2492                       "Buffer",
2493                       width_in_mbs * 64,
2494                       64);
2495     assert(bo);
2496     mfc_context->intra_row_store_scratch_buffer.bo = bo;
2497
2498     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2499     bo = dri_bo_alloc(i965->intel.bufmgr,
2500                       "Buffer",
2501                       width_in_mbs * height_in_mbs * 16,
2502                       64);
2503     assert(bo);
2504     mfc_context->macroblock_status_buffer.bo = bo;
2505
2506     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2507     bo = dri_bo_alloc(i965->intel.bufmgr,
2508                       "Buffer",
2509                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
2510                       64);
2511     assert(bo);
2512     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2513
2514     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2515     bo = dri_bo_alloc(i965->intel.bufmgr,
2516                       "Buffer",
2517                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
2518                       0x1000);
2519     assert(bo);
2520     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2521
2522     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2523     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2524
2525     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2526     mfc_context->aux_batchbuffer_surface.bo = NULL;
2527
2528     if (mfc_context->aux_batchbuffer)
2529         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2530
2531     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
2532     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
2533     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
2534     mfc_context->aux_batchbuffer_surface.pitch = 16;
2535     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
2536     mfc_context->aux_batchbuffer_surface.size_block = 16;
2537
2538     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
2539
2540     /* alloc vp8 encoding buffers*/
2541     dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
2542     bo = dri_bo_alloc(i965->intel.bufmgr,
2543                       "Buffer",
2544                       MAX_VP8_FRAME_HEADER_SIZE,
2545                       0x1000);
2546     assert(bo);
2547     mfc_context->vp8_state.frame_header_bo = bo;
2548
2549     mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 256 * 9;
2550     for(i = 0; i < 8; i++) {
2551         mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 256 * (i + 1);
2552     }
2553     dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
2554     bo = dri_bo_alloc(i965->intel.bufmgr,
2555                       "Buffer",
2556                       mfc_context->vp8_state.intermediate_buffer_max_size,
2557                       0x1000);
2558     assert(bo);
2559     mfc_context->vp8_state.intermediate_bo = bo;
2560
2561     dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
2562     bo = dri_bo_alloc(i965->intel.bufmgr,
2563                       "Buffer",
2564                       width_in_mbs * height_in_mbs * 16,
2565                       0x1000);
2566     assert(bo);
2567     mfc_context->vp8_state.stream_out_bo = bo;
2568
2569     dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
2570     bo = dri_bo_alloc(i965->intel.bufmgr,
2571                       "Buffer",
2572                       sizeof(vp8_default_coef_probs),
2573                       0x1000);
2574     assert(bo);
2575     mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
2576
2577     dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
2578     bo = dri_bo_alloc(i965->intel.bufmgr,
2579                       "Buffer",
2580                       VP8_TOKEN_STATISTICS_BUFFER_SIZE,
2581                       0x1000);
2582     assert(bo);
2583     mfc_context->vp8_state.token_statistics_bo = bo;
2584
2585     dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
2586     bo = dri_bo_alloc(i965->intel.bufmgr,
2587                       "Buffer",
2588                       width_in_mbs * 16 * 64,
2589                       0x1000);
2590     assert(bo);
2591     mfc_context->vp8_state.mpc_row_store_bo = bo;
2592
2593     vp8_enc_state_init(mfc_context, pic_param, q_matrix);
2594     vp8_enc_frame_header_binarize(encode_state, mfc_context);
2595 }
2596
2597 static VAStatus
2598 intel_mfc_vp8_prepare(VADriverContextP ctx,
2599                         struct encode_state *encode_state,
2600                         struct intel_encoder_context *encoder_context)
2601 {
2602     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2603     struct object_surface *obj_surface;
2604     struct object_buffer *obj_buffer;
2605     struct i965_coded_buffer_segment *coded_buffer_segment;
2606     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2607     VAStatus vaStatus = VA_STATUS_SUCCESS;
2608     dri_bo *bo;
2609     int i;
2610
2611     /* reconstructed surface */
2612     obj_surface = encode_state->reconstructed_object;
2613     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2614     if (pic_param->loop_filter_level[0] == 0) {
2615         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2616         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2617     } else {
2618         mfc_context->post_deblocking_output.bo = obj_surface->bo;
2619         dri_bo_reference(mfc_context->post_deblocking_output.bo);
2620     }
2621
2622     mfc_context->surface_state.width = obj_surface->orig_width;
2623     mfc_context->surface_state.height = obj_surface->orig_height;
2624     mfc_context->surface_state.w_pitch = obj_surface->width;
2625     mfc_context->surface_state.h_pitch = obj_surface->height;
2626
2627     /* set vp8 reference frames */
2628     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2629         obj_surface = encode_state->reference_objects[i];
2630
2631         if (obj_surface && obj_surface->bo) {
2632             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
2633             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2634         } else {
2635             mfc_context->reference_surfaces[i].bo = NULL;
2636         }
2637     }
2638
2639     /* input YUV surface */
2640     obj_surface = encode_state->input_yuv_object;
2641     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2642     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2643
2644     /* coded buffer */
2645     obj_buffer = encode_state->coded_buf_object;
2646     bo = obj_buffer->buffer_store->bo;
2647     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2648     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2649     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2650     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2651
2652     dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
2653     mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
2654     mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
2655     dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
2656
2657     /* set the internal flag to 0 to indicate the coded size is unknown */
2658     dri_bo_map(bo, 1);
2659     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2660     coded_buffer_segment->mapped = 0;
2661     coded_buffer_segment->codec = encoder_context->codec;
2662     dri_bo_unmap(bo);
2663
2664     return vaStatus;
2665 }
2666
2667 static void
2668 gen9_mfc_vp8_encoder_cfg(VADriverContextP ctx, 
2669                          struct encode_state *encode_state,
2670                          struct intel_encoder_context *encoder_context)
2671 {
2672     struct intel_batchbuffer *batch = encoder_context->base.batch;
2673     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2674     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2675     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2676
2677     BEGIN_BCS_BATCH(batch, 30);
2678     OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
2679
2680     OUT_BCS_BATCH(batch,
2681                   0 << 9 | /* compressed bitstream output disable */
2682                   1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
2683                   1 << 6 | /* RC initial pass */
2684                   0 << 4 | /* upate segment feature date flag */
2685                   1 << 3 | /* bitstream statistics output enable */
2686                   1 << 2 | /* token statistics output enable */
2687                   0 << 1 | /* final bitstream output disable */
2688                   0 << 0); /*DW1*/
2689     
2690     OUT_BCS_BATCH(batch, 0); /*DW2*/
2691
2692     OUT_BCS_BATCH(batch, 
2693                   0xfff << 16 | /* max intra mb bit count limit */
2694                   0xfff << 0  /* max inter mb bit count limit */
2695                   ); /*DW3*/
2696
2697     OUT_BCS_BATCH(batch, 0); /*DW4*/
2698     OUT_BCS_BATCH(batch, 0); /*DW5*/
2699     OUT_BCS_BATCH(batch, 0); /*DW6*/
2700     OUT_BCS_BATCH(batch, 0); /*DW7*/
2701     OUT_BCS_BATCH(batch, 0); /*DW8*/
2702     OUT_BCS_BATCH(batch, 0); /*DW9*/
2703     OUT_BCS_BATCH(batch, 0); /*DW10*/
2704     OUT_BCS_BATCH(batch, 0); /*DW11*/
2705     OUT_BCS_BATCH(batch, 0); /*DW12*/
2706     OUT_BCS_BATCH(batch, 0); /*DW13*/
2707     OUT_BCS_BATCH(batch, 0); /*DW14*/
2708     OUT_BCS_BATCH(batch, 0); /*DW15*/
2709     OUT_BCS_BATCH(batch, 0); /*DW16*/
2710     OUT_BCS_BATCH(batch, 0); /*DW17*/
2711     OUT_BCS_BATCH(batch, 0); /*DW18*/
2712     OUT_BCS_BATCH(batch, 0); /*DW19*/
2713     OUT_BCS_BATCH(batch, 0); /*DW20*/
2714     OUT_BCS_BATCH(batch, 0); /*DW21*/
2715
2716     OUT_BCS_BATCH(batch, 
2717                  pic_param->pic_flags.bits.show_frame << 23 |
2718                  pic_param->pic_flags.bits.version << 20
2719                  ); /*DW22*/
2720
2721     OUT_BCS_BATCH(batch,
2722                  (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
2723                  (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
2724                  );
2725
2726     /*DW24*/
2727     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
2728
2729     /*DW25*/
2730     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
2731
2732     /*DW26*/
2733     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
2734
2735     /*DW27*/
2736     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
2737
2738     /*DW28*/
2739     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
2740
2741     /*DW29*/
2742     OUT_BCS_BATCH(batch, 0);
2743
2744     ADVANCE_BCS_BATCH(batch);
2745 }
2746
2747 static void
2748 gen9_mfc_vp8_pic_state(VADriverContextP ctx,
2749                        struct encode_state *encode_state,
2750                        struct intel_encoder_context *encoder_context)
2751 {
2752     struct intel_batchbuffer *batch = encoder_context->base.batch;
2753     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2754     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2755     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2756     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2757     int i, j, log2num;
2758
2759     assert(pic_param->pic_flags.bits.num_token_partitions > 0);
2760     assert(pic_param->pic_flags.bits.num_token_partitions < 9);
2761     log2num = (int)log2(pic_param->pic_flags.bits.num_token_partitions);
2762
2763     /*update mode and token probs*/
2764     vp8_enc_state_update(mfc_context, q_matrix);
2765  
2766     BEGIN_BCS_BATCH(batch, 38);
2767     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2768     OUT_BCS_BATCH(batch,
2769                   (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
2770                   (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
2771  
2772     OUT_BCS_BATCH(batch,
2773                   log2num << 24 |
2774                   pic_param->sharpness_level << 16 |
2775                   pic_param->pic_flags.bits.sign_bias_alternate << 13 |
2776                   pic_param->pic_flags.bits.sign_bias_golden << 12 |
2777                   pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
2778                   pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
2779                   pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
2780                   pic_param->pic_flags.bits.segmentation_enabled << 8 |
2781                   !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2782                   (pic_param->pic_flags.bits.version / 2) << 4 |
2783                   (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2784                   !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
2785  
2786     OUT_BCS_BATCH(batch,
2787                   pic_param->loop_filter_level[3] << 24 |
2788                   pic_param->loop_filter_level[2] << 16 |
2789                   pic_param->loop_filter_level[1] <<  8 |
2790                   pic_param->loop_filter_level[0] <<  0);
2791
2792     OUT_BCS_BATCH(batch,
2793                   q_matrix->quantization_index[3] << 24 |
2794                   q_matrix->quantization_index[2] << 16 |
2795                   q_matrix->quantization_index[1] <<  8 |
2796                   q_matrix->quantization_index[0] << 0);
2797
2798     OUT_BCS_BATCH(batch,
2799                  ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 | 
2800                  abs(q_matrix->quantization_index_delta[4]) << 24 |
2801                  ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 | 
2802                  abs(q_matrix->quantization_index_delta[3]) << 16 |
2803                  ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 | 
2804                  abs(q_matrix->quantization_index_delta[2]) << 8 |
2805                  ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 | 
2806                  abs(q_matrix->quantization_index_delta[1]) << 0);
2807
2808     OUT_BCS_BATCH(batch,
2809                  ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
2810                  abs(q_matrix->quantization_index_delta[0]) << 0);
2811     
2812     OUT_BCS_BATCH(batch,
2813                  pic_param->clamp_qindex_high << 8 |
2814                  pic_param->clamp_qindex_low << 0);
2815
2816     for (i = 8; i < 19; i++) {
2817          OUT_BCS_BATCH(batch, 0xffffffff);
2818     }
2819
2820     OUT_BCS_BATCH(batch,
2821                   mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
2822                   mfc_context->vp8_state.mb_segment_tree_probs[1] <<  8 |
2823                   mfc_context->vp8_state.mb_segment_tree_probs[0] <<  0);
2824
2825     OUT_BCS_BATCH(batch,
2826                   mfc_context->vp8_state.prob_skip_false << 24 |
2827                   mfc_context->vp8_state.prob_intra      << 16 |
2828                   mfc_context->vp8_state.prob_last       <<  8 |
2829                   mfc_context->vp8_state.prob_gf         <<  0);
2830
2831     OUT_BCS_BATCH(batch,
2832                   mfc_context->vp8_state.y_mode_probs[3] << 24 |
2833                   mfc_context->vp8_state.y_mode_probs[2] << 16 |
2834                   mfc_context->vp8_state.y_mode_probs[1] <<  8 |
2835                   mfc_context->vp8_state.y_mode_probs[0] <<  0);
2836
2837     OUT_BCS_BATCH(batch,
2838                   mfc_context->vp8_state.uv_mode_probs[2] << 16 |
2839                   mfc_context->vp8_state.uv_mode_probs[1] <<  8 |
2840                   mfc_context->vp8_state.uv_mode_probs[0] <<  0);
2841     
2842     /* MV update value, DW23-DW32 */
2843     for (i = 0; i < 2; i++) {
2844         for (j = 0; j < 20; j += 4) {
2845             OUT_BCS_BATCH(batch,
2846                           (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
2847                           mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
2848                           mfc_context->vp8_state.mv_probs[i][j + 1] <<  8 |
2849                           mfc_context->vp8_state.mv_probs[i][j + 0] <<  0);
2850         }
2851     }
2852
2853     OUT_BCS_BATCH(batch,
2854                   (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
2855                   (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
2856                   (pic_param->ref_lf_delta[1] & 0x7f) <<  8 |
2857                   (pic_param->ref_lf_delta[0] & 0x7f) <<  0);
2858
2859     OUT_BCS_BATCH(batch,
2860                   (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
2861                   (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
2862                   (pic_param->mode_lf_delta[1] & 0x7f) <<  8 |
2863                   (pic_param->mode_lf_delta[0] & 0x7f) <<  0);
2864
2865     OUT_BCS_BATCH(batch, 0);
2866     OUT_BCS_BATCH(batch, 0);
2867     OUT_BCS_BATCH(batch, 0);
2868
2869     ADVANCE_BCS_BATCH(batch);
2870 }
2871
2872 #define OUT_VP8_BUFFER(bo, offset)                                      \
2873     if (bo)                                                             \
2874         OUT_BCS_RELOC(batch,                                            \
2875                       bo,                                               \
2876                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
2877                       offset);                                           \
2878     else                                                                \
2879         OUT_BCS_BATCH(batch, 0);                                        \
2880     OUT_BCS_BATCH(batch, 0);                                            \
2881     OUT_BCS_BATCH(batch, 0);
2882
2883 static void 
2884 gen9_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx, 
2885                                      struct encode_state *encode_state,
2886                                      struct intel_encoder_context *encoder_context)
2887 {
2888     struct intel_batchbuffer *batch = encoder_context->base.batch;
2889     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2890
2891     BEGIN_BCS_BATCH(batch, 32);
2892     OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
2893
2894     OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
2895
2896     OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
2897     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
2898     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
2899     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
2900     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
2901     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
2902     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
2903     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
2904     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
2905     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
2906
2907     OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
2908     OUT_BCS_BATCH(batch, 0);
2909
2910     OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
2911     OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
2912     OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
2913     OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
2914
2915     ADVANCE_BCS_BATCH(batch);
2916 }
2917
2918 static void
2919 gen9_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
2920                                            struct encode_state *encode_state,
2921                                            struct intel_encoder_context *encoder_context)
2922 {
2923     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2924
2925     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
2926     mfc_context->set_surface_state(ctx, encoder_context);
2927     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2928     gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
2929     gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2930     gen9_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
2931     gen9_mfc_vp8_pic_state(ctx, encode_state,encoder_context);
2932     gen9_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
2933 }
2934
2935 static const unsigned char
2936 vp8_intra_mb_mode_map[VME_MB_INTRA_MODE_COUNT] = {
2937     PAK_V_PRED,
2938     PAK_H_PRED,
2939     PAK_DC_PRED,
2940     PAK_TM_PRED
2941 };
2942
2943 static const unsigned char
2944 vp8_intra_block_mode_map[VME_B_INTRA_MODE_COUNT] = {
2945     PAK_B_VE_PRED,
2946     PAK_B_HE_PRED,
2947     PAK_B_DC_PRED,
2948     PAK_B_LD_PRED,
2949     PAK_B_RD_PRED,
2950     PAK_B_VR_PRED,
2951     PAK_B_HD_PRED,
2952     PAK_B_VL_PRED,
2953     PAK_B_HU_PRED
2954 };
2955
2956 static int inline gen9_mfc_vp8_intra_mb_mode_map(unsigned int vme_pred_mode, int is_luma_4x4)
2957 {
2958     unsigned int i, j, pak_pred_mode = 0;
2959     unsigned int vme_sub_blocks_pred_mode[8], pak_sub_blocks_pred_mode[8]; /* 8 blocks's intra mode */
2960
2961     if (!is_luma_4x4) {
2962         pak_pred_mode = vp8_intra_mb_mode_map[vme_pred_mode & 0x3];
2963     } else {
2964         for (i = 0; i < 8; i++) { 
2965             vme_sub_blocks_pred_mode[i] = ((vme_pred_mode >> (4 * i)) & 0xf);
2966             assert(vme_sub_blocks_pred_mode[i] < VME_B_INTRA_MODE_COUNT);
2967             pak_sub_blocks_pred_mode[i] = vp8_intra_block_mode_map[vme_sub_blocks_pred_mode[i]];
2968             pak_pred_mode |= (pak_sub_blocks_pred_mode[i] << (4 * i));
2969         }
2970     }
2971
2972     return pak_pred_mode;
2973 }
2974 static void
2975 gen9_mfc_vp8_pak_object_intra(VADriverContextP ctx, 
2976                               struct intel_encoder_context *encoder_context,
2977                               unsigned int *msg,
2978                               int x, int y,
2979                               struct intel_batchbuffer *batch)
2980 {
2981     unsigned int vme_intra_mb_mode, vme_chroma_pred_mode;
2982     unsigned int pak_intra_mb_mode, pak_chroma_pred_mode;
2983     unsigned int vme_luma_pred_mode[2], pak_luma_pred_mode[2];
2984
2985     if (batch == NULL)
2986         batch = encoder_context->base.batch;
2987
2988     vme_intra_mb_mode = ((msg[0] & 0x30) >> 4);
2989     assert((vme_intra_mb_mode == 0) || (vme_intra_mb_mode == 2)); //vp8 only support intra_16x16 and intra_4x4
2990     pak_intra_mb_mode = (vme_intra_mb_mode >> 1);
2991
2992     vme_luma_pred_mode[0] = msg[1];
2993     vme_luma_pred_mode[1] = msg[2];
2994     vme_chroma_pred_mode = msg[3] & 0x3;
2995
2996     pak_luma_pred_mode[0] = gen9_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[0], pak_intra_mb_mode);
2997     pak_luma_pred_mode[1] = gen9_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[1], pak_intra_mb_mode);
2998     pak_chroma_pred_mode = gen9_mfc_vp8_intra_mb_mode_map(vme_chroma_pred_mode, 0);
2999
3000     BEGIN_BCS_BATCH(batch, 7);
3001
3002     OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
3003     OUT_BCS_BATCH(batch, 0);
3004     OUT_BCS_BATCH(batch, 0);
3005     OUT_BCS_BATCH(batch,
3006                   (0 << 20) |                    /* mv format: intra mb */
3007                   (0 << 18) |                    /* Segment ID */
3008                   (0 << 17) |                    /* disable coeff clamp */
3009                   (1 << 13) |                    /* intra mb flag */
3010                   (0 << 11) |                    /* refer picture select: last frame */
3011                   (pak_intra_mb_mode << 8) |     /* mb type */
3012                   (pak_chroma_pred_mode << 4) |  /* mb uv mode */
3013                   (0 << 2) |                     /* skip mb flag: disable */
3014                   0);
3015
3016     OUT_BCS_BATCH(batch, (y << 16) | x);
3017     OUT_BCS_BATCH(batch, pak_luma_pred_mode[0]);
3018     OUT_BCS_BATCH(batch, pak_luma_pred_mode[1]);
3019
3020     ADVANCE_BCS_BATCH(batch);
3021 }
3022
3023 static void
3024 gen9_mfc_vp8_pak_object_inter(VADriverContextP ctx, 
3025                               struct intel_encoder_context *encoder_context,
3026                               unsigned int *msg,
3027                               int offset,
3028                               int x, int y,
3029                               struct intel_batchbuffer *batch)
3030 {
3031     int i;
3032
3033     if (batch == NULL)
3034         batch = encoder_context->base.batch;
3035
3036     /* only support inter_16x16 now */
3037     assert((msg[AVC_INTER_MSG_OFFSET] & INTER_MODE_MASK) == INTER_16X16);
3038     /* for inter_16x16, all 16 MVs should be same, 
3039      * and move mv to the vme mb start address to make sure offset is 64 bytes aligned */
3040     msg[0] = (msg[AVC_INTER_MV_OFFSET/4] & 0xfffefffe);
3041     for (i = 1; i < 16; i++) {
3042         msg[i] = msg[0];
3043     }
3044     
3045     BEGIN_BCS_BATCH(batch, 7);
3046
3047     OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
3048     OUT_BCS_BATCH(batch,
3049                   (0 << 29) |           /* enable inline mv data: disable */
3050                   64);
3051     OUT_BCS_BATCH(batch,
3052                   offset);
3053     OUT_BCS_BATCH(batch,
3054                   (4 << 20) |           /* mv format: inter */
3055                   (0 << 18) |           /* Segment ID */
3056                   (0 << 17) |           /* coeff clamp: disable */
3057                   (0 << 13) |           /* intra mb flag: inter mb */
3058                   (0 << 11) |           /* refer picture select: last frame */
3059                   (0 << 8) |            /* mb type: 16x16 */
3060                   (0 << 4) |            /* mb uv mode: dc_pred */
3061                   (0 << 2) |            /* skip mb flag: disable */
3062                   0);
3063
3064     OUT_BCS_BATCH(batch, (y << 16) | x);
3065
3066     /*new mv*/
3067     OUT_BCS_BATCH(batch, 0x8);
3068     OUT_BCS_BATCH(batch, 0x8);
3069
3070     ADVANCE_BCS_BATCH(batch);
3071 }
3072
3073 static void
3074 gen9_mfc_vp8_pak_pipeline(VADriverContextP ctx,
3075                           struct encode_state *encode_state,
3076                           struct intel_encoder_context *encoder_context,
3077                           struct intel_batchbuffer *slice_batch)
3078 {
3079     struct gen6_vme_context *vme_context = encoder_context->vme_context;
3080     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3081     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3082     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3083     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3084     unsigned int *msg = NULL;
3085     unsigned char *msg_ptr = NULL;
3086     unsigned int i, offset, is_intra_frame;
3087
3088     is_intra_frame = !pic_param->pic_flags.bits.frame_type;
3089
3090     dri_bo_map(vme_context->vme_output.bo , 1);
3091     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
3092
3093     for( i = 0; i < width_in_mbs * height_in_mbs; i++) {
3094         int h_pos = i % width_in_mbs;
3095         int v_pos = i / width_in_mbs;
3096         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
3097         
3098         if (is_intra_frame) {
3099             gen9_mfc_vp8_pak_object_intra(ctx,
3100                     encoder_context,
3101                     msg,
3102                     h_pos, v_pos,
3103                     slice_batch);
3104         } else {
3105             int inter_rdo, intra_rdo;
3106             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
3107             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
3108
3109             if (intra_rdo < inter_rdo) {
3110                 gen9_mfc_vp8_pak_object_intra(ctx,
3111                         encoder_context,
3112                         msg,
3113                         h_pos, v_pos,
3114                         slice_batch);
3115             } else {
3116                 offset = i * vme_context->vme_output.size_block;
3117                 gen9_mfc_vp8_pak_object_inter(ctx,
3118                         encoder_context,
3119                         msg,
3120                         offset,
3121                         h_pos, v_pos,
3122                         slice_batch);
3123             }
3124         }
3125     }
3126
3127     dri_bo_unmap(vme_context->vme_output.bo);
3128 }
3129
3130 /*
3131  * A batch buffer for vp8 pak object commands
3132  */
3133 static dri_bo *
3134 gen9_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
3135                                           struct encode_state *encode_state,
3136                                           struct intel_encoder_context *encoder_context)
3137 {
3138     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3139     struct intel_batchbuffer *batch;
3140     dri_bo *batch_bo;
3141
3142     batch = mfc_context->aux_batchbuffer;
3143     batch_bo = batch->buffer;
3144
3145     gen9_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
3146
3147     intel_batchbuffer_align(batch, 8);
3148
3149     BEGIN_BCS_BATCH(batch, 2);
3150     OUT_BCS_BATCH(batch, 0);
3151     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
3152     ADVANCE_BCS_BATCH(batch);
3153
3154     dri_bo_reference(batch_bo);
3155     intel_batchbuffer_free(batch);
3156     mfc_context->aux_batchbuffer = NULL;
3157
3158     return batch_bo;
3159 }
3160
3161 static void
3162 gen9_mfc_vp8_pipeline_programing(VADriverContextP ctx,
3163                                    struct encode_state *encode_state,
3164                                    struct intel_encoder_context *encoder_context)
3165 {
3166     struct intel_batchbuffer *batch = encoder_context->base.batch;
3167     dri_bo *slice_batch_bo;
3168
3169     slice_batch_bo = gen9_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
3170
3171     // begin programing
3172     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
3173     intel_batchbuffer_emit_mi_flush(batch);
3174
3175     // picture level programing
3176     gen9_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
3177
3178     BEGIN_BCS_BATCH(batch, 4);
3179     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
3180     OUT_BCS_RELOC(batch,
3181                   slice_batch_bo,
3182                   I915_GEM_DOMAIN_COMMAND, 0,
3183                   0);
3184     OUT_BCS_BATCH(batch, 0);
3185     OUT_BCS_BATCH(batch, 0);
3186     ADVANCE_BCS_BATCH(batch);
3187
3188     // end programing
3189     intel_batchbuffer_end_atomic(batch);
3190
3191     dri_bo_unreference(slice_batch_bo);
3192 }
3193
3194 static void gen9_mfc_calc_vp8_coded_buffer_size(VADriverContextP ctx,
3195                           struct encode_state *encode_state,
3196                           struct intel_encoder_context *encoder_context)
3197 {
3198     struct i965_driver_data *i965 = i965_driver_data(ctx);
3199     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3200     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3201     unsigned char is_intra_frame = !pic_param->pic_flags.bits.frame_type;
3202     unsigned int *vp8_encoding_status, first_partition_bytes, token_partition_bytes, vp8_coded_bytes;
3203
3204     dri_bo_map(mfc_context->vp8_state.token_statistics_bo, 0);
3205
3206     vp8_encoding_status = (unsigned int *)mfc_context->vp8_state.token_statistics_bo->virtual;
3207     first_partition_bytes = (*vp8_encoding_status + 7) / 8;
3208     token_partition_bytes = (*(unsigned int *)(vp8_encoding_status + 9) + 7) / 8;
3209     
3210     /*coded_bytes includes P0~P8 partitions bytes + uncompresse date bytes + partion_size bytes in bitstream */
3211     vp8_coded_bytes = first_partition_bytes + token_partition_bytes + (3 + 7 * !!is_intra_frame) + (pic_param->pic_flags.bits.num_token_partitions - 1) * 3;
3212
3213     dri_bo_unmap(mfc_context->vp8_state.token_statistics_bo);
3214
3215     dri_bo_map(mfc_context->vp8_state.final_frame_bo, 0);
3216     struct i965_coded_buffer_segment *coded_buffer_segment = (struct i965_coded_buffer_segment *)(mfc_context->vp8_state.final_frame_bo->virtual);
3217     coded_buffer_segment->base.size = vp8_coded_bytes;
3218     dri_bo_unmap(mfc_context->vp8_state.final_frame_bo);
3219 }
3220
3221 static VAStatus
3222 gen9_mfc_vp8_encode_picture(VADriverContextP ctx,
3223                               struct encode_state *encode_state,
3224                               struct intel_encoder_context *encoder_context)
3225 {
3226     gen9_mfc_vp8_init(ctx, encode_state, encoder_context);
3227     intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
3228     /*Programing bcs pipeline*/
3229     gen9_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
3230     gen9_mfc_run(ctx, encode_state, encoder_context);
3231     gen9_mfc_calc_vp8_coded_buffer_size(ctx, encode_state, encoder_context);
3232
3233     return VA_STATUS_SUCCESS;
3234 }
3235
3236 static void
3237 gen9_mfc_context_destroy(void *context)
3238 {
3239     struct gen6_mfc_context *mfc_context = context;
3240     int i;
3241
3242     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
3243     mfc_context->post_deblocking_output.bo = NULL;
3244
3245     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
3246     mfc_context->pre_deblocking_output.bo = NULL;
3247
3248     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
3249     mfc_context->uncompressed_picture_source.bo = NULL;
3250
3251     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
3252     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
3253
3254     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
3255         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
3256         mfc_context->direct_mv_buffers[i].bo = NULL;
3257     }
3258
3259     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
3260     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
3261
3262     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
3263     mfc_context->macroblock_status_buffer.bo = NULL;
3264
3265     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
3266     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3267
3268     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
3269     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3270
3271
3272     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
3273         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
3274         mfc_context->reference_surfaces[i].bo = NULL;
3275     }
3276
3277     i965_gpe_context_destroy(&mfc_context->gpe_context);
3278
3279     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
3280     mfc_context->mfc_batchbuffer_surface.bo = NULL;
3281
3282     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
3283     mfc_context->aux_batchbuffer_surface.bo = NULL;
3284
3285     if (mfc_context->aux_batchbuffer)
3286         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
3287
3288     mfc_context->aux_batchbuffer = NULL;
3289
3290     dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3291     mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
3292
3293     dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
3294     mfc_context->vp8_state.final_frame_bo = NULL;
3295
3296     dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
3297     mfc_context->vp8_state.frame_header_bo = NULL;
3298
3299     dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
3300     mfc_context->vp8_state.intermediate_bo = NULL;
3301
3302     dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
3303     mfc_context->vp8_state.mpc_row_store_bo = NULL;
3304
3305     dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
3306     mfc_context->vp8_state.stream_out_bo = NULL;
3307
3308     dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
3309     mfc_context->vp8_state.token_statistics_bo = NULL;
3310
3311     free(mfc_context);
3312 }
3313
3314 static VAStatus gen9_mfc_pipeline(VADriverContextP ctx,
3315                                   VAProfile profile,
3316                                   struct encode_state *encode_state,
3317                                   struct intel_encoder_context *encoder_context)
3318 {
3319     VAStatus vaStatus;
3320
3321     switch (profile) {
3322     case VAProfileH264ConstrainedBaseline:
3323     case VAProfileH264Main:
3324     case VAProfileH264High:
3325     case VAProfileH264MultiviewHigh:
3326     case VAProfileH264StereoHigh:
3327         vaStatus = gen9_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
3328         break;
3329
3330         /* FIXME: add for other profile */
3331     case VAProfileMPEG2Simple:
3332     case VAProfileMPEG2Main:
3333         vaStatus = gen9_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
3334         break;
3335
3336     case VAProfileVP8Version0_3:
3337         vaStatus = gen9_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
3338         break;
3339
3340     default:
3341         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3342         break;
3343     }
3344
3345     return vaStatus;
3346 }
3347
3348 Bool gen9_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3349 {
3350     struct gen6_mfc_context *mfc_context = NULL;
3351
3352 #if MFC_SOFTWARE_HASWELL
3353     if ((encoder_context->codec == CODEC_H264) ||
3354         (encoder_context->codec == CODEC_H264_MVC) ||
3355         (encoder_context->codec == CODEC_MPEG2)) {
3356
3357         return gen8_mfc_context_init(ctx, encoder_context);
3358     }
3359 #endif
3360
3361     mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
3362     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
3363
3364     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
3365     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
3366
3367     mfc_context->gpe_context.curbe.length = 32 * 4;
3368
3369     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
3370     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
3371     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
3372     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
3373     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
3374
3375     i965_gpe_load_kernels(ctx,
3376                           &mfc_context->gpe_context,
3377                           gen9_mfc_kernels,
3378                           NUM_MFC_KERNEL);
3379
3380     mfc_context->pipe_mode_select = gen9_mfc_pipe_mode_select;
3381     mfc_context->set_surface_state = gen9_mfc_surface_state;
3382     mfc_context->ind_obj_base_addr_state = gen9_mfc_ind_obj_base_addr_state;
3383     mfc_context->avc_img_state = gen9_mfc_avc_img_state;
3384     mfc_context->avc_qm_state = gen9_mfc_avc_qm_state;
3385     mfc_context->avc_fqm_state = gen9_mfc_avc_fqm_state;
3386     mfc_context->insert_object = gen9_mfc_avc_insert_object;
3387     mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
3388
3389     encoder_context->mfc_context = mfc_context;
3390     encoder_context->mfc_context_destroy = gen9_mfc_context_destroy;
3391     encoder_context->mfc_pipeline = gen9_mfc_pipeline;
3392     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
3393
3394     return True;
3395 }