OSDN Git Service

ROI:enable on gen8 and gen9
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_mfc.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
47 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
48 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
49
50 #define B0_STEP_REV             2
51 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
52
53 static const uint32_t gen9_mfc_batchbuffer_avc_intra[][4] = {
54 #include "shaders/utils/mfc_batchbuffer_avc_intra.g9b"
55 };
56
57 static const uint32_t gen9_mfc_batchbuffer_avc_inter[][4] = {
58 #include "shaders/utils/mfc_batchbuffer_avc_inter.g9b"
59 };
60
61 static struct i965_kernel gen9_mfc_kernels[] = {
62     {
63         "MFC AVC INTRA BATCHBUFFER ",
64         MFC_BATCHBUFFER_AVC_INTRA,
65         gen9_mfc_batchbuffer_avc_intra,
66         sizeof(gen9_mfc_batchbuffer_avc_intra),
67         NULL
68     },
69
70     {
71         "MFC AVC INTER BATCHBUFFER ",
72         MFC_BATCHBUFFER_AVC_INTER,
73         gen9_mfc_batchbuffer_avc_inter,
74         sizeof(gen9_mfc_batchbuffer_avc_inter),
75         NULL
76     },
77 };
78
79 #define         INTER_MODE_MASK         0x03
80 #define         INTER_8X8               0x03
81 #define         INTER_16X8              0x01
82 #define         INTER_8X16              0x02
83 #define         SUBMB_SHAPE_MASK        0x00FF00
84 #define         INTER_16X16             0x00
85
86 #define         INTER_MV8               (4 << 20)
87 #define         INTER_MV32              (6 << 20)
88
89 static void
90 gen9_mfc_pipe_mode_select(VADriverContextP ctx,
91                           int standard_select,
92                           struct intel_encoder_context *encoder_context)
93 {
94     struct intel_batchbuffer *batch = encoder_context->base.batch;
95     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
96
97     assert(standard_select == MFX_FORMAT_MPEG2 ||
98            standard_select == MFX_FORMAT_AVC  ||
99            standard_select == MFX_FORMAT_VP8);
100
101     BEGIN_BCS_BATCH(batch, 5);
102
103     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
104     OUT_BCS_BATCH(batch,
105                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
106                   (MFD_MODE_VLD << 15) | /* VLD mode */
107                   (0 << 10) | /* Stream-Out Enable */
108                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
109                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
110                   (0 << 6)  | /* frame statistics stream-out enable*/
111                   (0 << 5)  | /* not in stitch mode */
112                   (1 << 4)  | /* encoding mode */
113                   (standard_select << 0));  /* standard select: avc or mpeg2 */
114     OUT_BCS_BATCH(batch,
115                   (0 << 7)  | /* expand NOA bus flag */
116                   (0 << 6)  | /* disable slice-level clock gating */
117                   (0 << 5)  | /* disable clock gating for NOA */
118                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
119                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
120                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
121                   (0 << 1)  |
122                   (0 << 0));
123     OUT_BCS_BATCH(batch, 0);
124     OUT_BCS_BATCH(batch, 0);
125
126     ADVANCE_BCS_BATCH(batch);
127 }
128
129 static void
130 gen9_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
131 {
132     struct intel_batchbuffer *batch = encoder_context->base.batch;
133     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
134
135     BEGIN_BCS_BATCH(batch, 6);
136
137     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
138     OUT_BCS_BATCH(batch, 0);
139     OUT_BCS_BATCH(batch,
140                   ((mfc_context->surface_state.height - 1) << 18) |
141                   ((mfc_context->surface_state.width - 1) << 4));
142     OUT_BCS_BATCH(batch,
143                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
144                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
145                   (0 << 22) | /* surface object control state, FIXME??? */
146                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
147                   (0 << 2)  | /* must be 0 for interleave U/V */
148                   (1 << 1)  | /* must be tiled */
149                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
150     OUT_BCS_BATCH(batch,
151                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
152                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
153     OUT_BCS_BATCH(batch, 0);
154
155     ADVANCE_BCS_BATCH(batch);
156 }
157
158 static void
159 gen9_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
160                                  struct intel_encoder_context *encoder_context)
161 {
162     struct intel_batchbuffer *batch = encoder_context->base.batch;
163     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
164     struct gen6_vme_context *vme_context = encoder_context->vme_context;
165     int vme_size;
166
167     BEGIN_BCS_BATCH(batch, 26);
168
169     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
170     /* the DW1-3 is for the MFX indirect bistream offset */
171     OUT_BCS_BATCH(batch, 0);
172     OUT_BCS_BATCH(batch, 0);
173     OUT_BCS_BATCH(batch, 0);
174
175     vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
176
177     OUT_BCS_BATCH(batch, 0);
178     OUT_BCS_BATCH(batch, 0);
179
180     /* the DW6-10 is for MFX Indirect MV Object Base Address */
181     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
185     OUT_BCS_BATCH(batch, 0);
186
187     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192     OUT_BCS_BATCH(batch, 0);
193
194     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
195     OUT_BCS_BATCH(batch, 0);
196     OUT_BCS_BATCH(batch, 0);
197     OUT_BCS_BATCH(batch, 0);
198     OUT_BCS_BATCH(batch, 0);
199     OUT_BCS_BATCH(batch, 0);
200
201     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
202     OUT_BCS_RELOC(batch,
203                   mfc_context->mfc_indirect_pak_bse_object.bo,
204                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
205                   0);
206     OUT_BCS_BATCH(batch, 0);
207     OUT_BCS_BATCH(batch, 0);
208
209     OUT_BCS_RELOC(batch,
210                   mfc_context->mfc_indirect_pak_bse_object.bo,
211                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
212                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
213     OUT_BCS_BATCH(batch, 0);
214
215     ADVANCE_BCS_BATCH(batch);
216 }
217
218 static void
219 gen9_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
220                        struct intel_encoder_context *encoder_context)
221 {
222     struct intel_batchbuffer *batch = encoder_context->base.batch;
223     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
224     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
225
226     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
227     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
228
229     BEGIN_BCS_BATCH(batch, 16);
230
231     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
232     /*DW1. MB setting of frame */
233     OUT_BCS_BATCH(batch,
234                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
235     OUT_BCS_BATCH(batch,
236                   ((height_in_mbs - 1) << 16) |
237                   ((width_in_mbs - 1) << 0));
238     /* DW3 QP setting */
239     OUT_BCS_BATCH(batch,
240                   (0 << 24) |   /* Second Chroma QP Offset */
241                   (0 << 16) |   /* Chroma QP Offset */
242                   (0 << 14) |   /* Max-bit conformance Intra flag */
243                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
244                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
245                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
246                   (0 << 8)  |   /* FIXME: Image Structure */
247                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
248     OUT_BCS_BATCH(batch,
249                   (0 << 16) |   /* Mininum Frame size */
250                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
251                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
252                   (0 << 13) |   /* CABAC 0 word insertion test enable */
253                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
254                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
255                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
256                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
257                   (0 << 6)  |   /* Only valid for VLD decoding mode */
258                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
259                   (0 << 4)  |   /* Direct 8x8 inference flag */
260                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
261                   (1 << 2)  |   /* Frame MB only flag */
262                   (0 << 1)  |   /* MBAFF mode is in active */
263                   (0 << 0));    /* Field picture flag */
264     /* DW5 Trellis quantization */
265     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
266     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
267                   (0xBB8 << 16) |       /* InterMbMaxSz */
268                   (0xEE8) );            /* IntraMbMaxSz */
269     OUT_BCS_BATCH(batch, 0);            /* Reserved */
270     /* DW8. QP delta */
271     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
272     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
273     /* DW10. Bit setting for MB */
274     OUT_BCS_BATCH(batch, 0x8C000000);
275     OUT_BCS_BATCH(batch, 0x00010000);
276     /* DW12. */
277     OUT_BCS_BATCH(batch, 0);
278     OUT_BCS_BATCH(batch, 0x02010100);
279     /* DW14. For short format */
280     OUT_BCS_BATCH(batch, 0);
281     OUT_BCS_BATCH(batch, 0);
282
283     ADVANCE_BCS_BATCH(batch);
284 }
285
286 static void
287 gen9_mfc_qm_state(VADriverContextP ctx,
288                   int qm_type,
289                   unsigned int *qm,
290                   int qm_length,
291                   struct intel_encoder_context *encoder_context)
292 {
293     struct intel_batchbuffer *batch = encoder_context->base.batch;
294     unsigned int qm_buffer[16];
295
296     assert(qm_length <= 16);
297     assert(sizeof(*qm) == 4);
298     memcpy(qm_buffer, qm, qm_length * 4);
299
300     BEGIN_BCS_BATCH(batch, 18);
301     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
302     OUT_BCS_BATCH(batch, qm_type << 0);
303     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
304     ADVANCE_BCS_BATCH(batch);
305 }
306
307 static void
308 gen9_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
309 {
310     unsigned int qm[16] = {
311         0x10101010, 0x10101010, 0x10101010, 0x10101010,
312         0x10101010, 0x10101010, 0x10101010, 0x10101010,
313         0x10101010, 0x10101010, 0x10101010, 0x10101010,
314         0x10101010, 0x10101010, 0x10101010, 0x10101010
315     };
316
317     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
318     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
319     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
320     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
321 }
322
323 static void
324 gen9_mfc_fqm_state(VADriverContextP ctx,
325                    int fqm_type,
326                    unsigned int *fqm,
327                    int fqm_length,
328                    struct intel_encoder_context *encoder_context)
329 {
330     struct intel_batchbuffer *batch = encoder_context->base.batch;
331     unsigned int fqm_buffer[32];
332
333     assert(fqm_length <= 32);
334     assert(sizeof(*fqm) == 4);
335     memcpy(fqm_buffer, fqm, fqm_length * 4);
336
337     BEGIN_BCS_BATCH(batch, 34);
338     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
339     OUT_BCS_BATCH(batch, fqm_type << 0);
340     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
341     ADVANCE_BCS_BATCH(batch);
342 }
343
344 static void
345 gen9_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
346 {
347     unsigned int qm[32] = {
348         0x10001000, 0x10001000, 0x10001000, 0x10001000,
349         0x10001000, 0x10001000, 0x10001000, 0x10001000,
350         0x10001000, 0x10001000, 0x10001000, 0x10001000,
351         0x10001000, 0x10001000, 0x10001000, 0x10001000,
352         0x10001000, 0x10001000, 0x10001000, 0x10001000,
353         0x10001000, 0x10001000, 0x10001000, 0x10001000,
354         0x10001000, 0x10001000, 0x10001000, 0x10001000,
355         0x10001000, 0x10001000, 0x10001000, 0x10001000
356     };
357
358     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
359     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
360     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
361     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
362 }
363
364 static void
365 gen9_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
366                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
367                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
368                            struct intel_batchbuffer *batch)
369 {
370     if (batch == NULL)
371         batch = encoder_context->base.batch;
372
373     if (data_bits_in_last_dw == 0)
374         data_bits_in_last_dw = 32;
375
376     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
377
378     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
379     OUT_BCS_BATCH(batch,
380                   (0 << 16) |   /* always start at offset 0 */
381                   (data_bits_in_last_dw << 8) |
382                   (skip_emul_byte_count << 4) |
383                   (!!emulation_flag << 3) |
384                   ((!!is_last_header) << 2) |
385                   ((!!is_end_of_slice) << 1) |
386                   (0 << 0));    /* FIXME: ??? */
387     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
388
389     ADVANCE_BCS_BATCH(batch);
390 }
391
392
393 static void gen9_mfc_init(VADriverContextP ctx,
394                           struct encode_state *encode_state,
395                           struct intel_encoder_context *encoder_context)
396 {
397     struct i965_driver_data *i965 = i965_driver_data(ctx);
398     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
399     dri_bo *bo;
400     int i;
401     int width_in_mbs = 0;
402     int height_in_mbs = 0;
403     int slice_batchbuffer_size;
404
405     if (encoder_context->codec == CODEC_H264 ||
406         encoder_context->codec == CODEC_H264_MVC) {
407         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
408         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
409         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
410     } else {
411         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
412
413         assert(encoder_context->codec == CODEC_MPEG2);
414
415         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
416         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
417     }
418
419     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
420                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
421
422     /*Encode common setup for MFC*/
423     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
424     mfc_context->post_deblocking_output.bo = NULL;
425
426     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
427     mfc_context->pre_deblocking_output.bo = NULL;
428
429     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
430     mfc_context->uncompressed_picture_source.bo = NULL;
431
432     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
433     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
434
435     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
436         if (mfc_context->direct_mv_buffers[i].bo != NULL)
437             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
438         mfc_context->direct_mv_buffers[i].bo = NULL;
439     }
440
441     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
442         if (mfc_context->reference_surfaces[i].bo != NULL)
443             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
444         mfc_context->reference_surfaces[i].bo = NULL;
445     }
446
447     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
448     bo = dri_bo_alloc(i965->intel.bufmgr,
449                       "Buffer",
450                       width_in_mbs * 64,
451                       64);
452     assert(bo);
453     mfc_context->intra_row_store_scratch_buffer.bo = bo;
454
455     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
456     bo = dri_bo_alloc(i965->intel.bufmgr,
457                       "Buffer",
458                       width_in_mbs * height_in_mbs * 16,
459                       64);
460     assert(bo);
461     mfc_context->macroblock_status_buffer.bo = bo;
462
463     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
464     bo = dri_bo_alloc(i965->intel.bufmgr,
465                       "Buffer",
466                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
467                       64);
468     assert(bo);
469     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
470
471     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
472     bo = dri_bo_alloc(i965->intel.bufmgr,
473                       "Buffer",
474                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
475                       0x1000);
476     assert(bo);
477     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
478
479     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
480     mfc_context->mfc_batchbuffer_surface.bo = NULL;
481
482     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
483     mfc_context->aux_batchbuffer_surface.bo = NULL;
484
485     if (mfc_context->aux_batchbuffer)
486         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
487
488     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
489     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
490     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
491     mfc_context->aux_batchbuffer_surface.pitch = 16;
492     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
493     mfc_context->aux_batchbuffer_surface.size_block = 16;
494
495     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
496 }
497
498 static void
499 gen9_mfc_pipe_buf_addr_state(VADriverContextP ctx,
500                              struct intel_encoder_context *encoder_context)
501 {
502     struct intel_batchbuffer *batch = encoder_context->base.batch;
503     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
504     int i;
505
506     BEGIN_BCS_BATCH(batch, 61);
507
508     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
509
510     /* the DW1-3 is for pre_deblocking */
511     if (mfc_context->pre_deblocking_output.bo)
512         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
513                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
514                       0);
515     else
516         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
517
518     OUT_BCS_BATCH(batch, 0);
519     OUT_BCS_BATCH(batch, 0);
520     /* the DW4-6 is for the post_deblocking */
521
522     /* post output addr  */
523     if (mfc_context->post_deblocking_output.bo)
524         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
525                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
526                       0);
527     else
528         OUT_BCS_BATCH(batch, 0);
529
530     OUT_BCS_BATCH(batch, 0);
531     OUT_BCS_BATCH(batch, 0);
532
533     /* the DW7-9 is for the uncompressed_picture */
534     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
535                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
536                   0); /* uncompressed data */
537
538     OUT_BCS_BATCH(batch, 0);
539     OUT_BCS_BATCH(batch, 0);
540
541     /* the DW10-12 is for the mb status */
542     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
543                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
544                   0); /* StreamOut data*/
545
546     OUT_BCS_BATCH(batch, 0);
547     OUT_BCS_BATCH(batch, 0);
548
549     /* the DW13-15 is for the intra_row_store_scratch */
550     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
551                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
552                   0);
553
554     OUT_BCS_BATCH(batch, 0);
555     OUT_BCS_BATCH(batch, 0);
556
557     /* the DW16-18 is for the deblocking filter */
558     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
559                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
560                   0);
561
562     OUT_BCS_BATCH(batch, 0);
563     OUT_BCS_BATCH(batch, 0);
564
565     /* the DW 19-50 is for Reference pictures*/
566     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
567         if ( mfc_context->reference_surfaces[i].bo != NULL) {
568             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
569                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
570                           0);
571         } else {
572             OUT_BCS_BATCH(batch, 0);
573         }
574
575         OUT_BCS_BATCH(batch, 0);
576     }
577
578     OUT_BCS_BATCH(batch, 0);
579
580     /* The DW 52-54 is for the MB status buffer */
581     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
582                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
583                   0);
584
585     OUT_BCS_BATCH(batch, 0);
586     OUT_BCS_BATCH(batch, 0);
587
588     /* the DW 55-57 is the ILDB buffer */
589     OUT_BCS_BATCH(batch, 0);
590     OUT_BCS_BATCH(batch, 0);
591     OUT_BCS_BATCH(batch, 0);
592
593     /* the DW 58-60 is the second ILDB buffer */
594     OUT_BCS_BATCH(batch, 0);
595     OUT_BCS_BATCH(batch, 0);
596     OUT_BCS_BATCH(batch, 0);
597
598     ADVANCE_BCS_BATCH(batch);
599 }
600
601 static void
602 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
603                               struct intel_encoder_context *encoder_context)
604 {
605     struct intel_batchbuffer *batch = encoder_context->base.batch;
606     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
607
608     int i;
609
610     BEGIN_BCS_BATCH(batch, 71);
611
612     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
613
614     /* Reference frames and Current frames */
615     /* the DW1-32 is for the direct MV for reference */
616     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
617         if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
618             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
619                           I915_GEM_DOMAIN_INSTRUCTION, 0,
620                           0);
621             OUT_BCS_BATCH(batch, 0);
622         } else {
623             OUT_BCS_BATCH(batch, 0);
624             OUT_BCS_BATCH(batch, 0);
625         }
626     }
627
628     OUT_BCS_BATCH(batch, 0);
629
630     /* the DW34-36 is the MV for the current reference */
631     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
632                   I915_GEM_DOMAIN_INSTRUCTION, 0,
633                   0);
634
635     OUT_BCS_BATCH(batch, 0);
636     OUT_BCS_BATCH(batch, 0);
637
638     /* POL list */
639     for(i = 0; i < 32; i++) {
640         OUT_BCS_BATCH(batch, i/2);
641     }
642     OUT_BCS_BATCH(batch, 0);
643     OUT_BCS_BATCH(batch, 0);
644
645     ADVANCE_BCS_BATCH(batch);
646 }
647
648
649 static void
650 gen9_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
651                                  struct intel_encoder_context *encoder_context)
652 {
653     struct intel_batchbuffer *batch = encoder_context->base.batch;
654     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
655
656     BEGIN_BCS_BATCH(batch, 10);
657
658     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
659     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
660                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
661                   0);
662     OUT_BCS_BATCH(batch, 0);
663     OUT_BCS_BATCH(batch, 0);
664
665     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
666     OUT_BCS_BATCH(batch, 0);
667     OUT_BCS_BATCH(batch, 0);
668     OUT_BCS_BATCH(batch, 0);
669
670     /* the DW7-9 is for Bitplane Read Buffer Base Address */
671     OUT_BCS_BATCH(batch, 0);
672     OUT_BCS_BATCH(batch, 0);
673     OUT_BCS_BATCH(batch, 0);
674
675     ADVANCE_BCS_BATCH(batch);
676 }
677
678
679 static void gen9_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
680                                                       struct encode_state *encode_state,
681                                                       struct intel_encoder_context *encoder_context)
682 {
683     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
684
685     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
686     mfc_context->set_surface_state(ctx, encoder_context);
687     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
688     gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
689     gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
690     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
691     mfc_context->avc_qm_state(ctx, encoder_context);
692     mfc_context->avc_fqm_state(ctx, encoder_context);
693     gen9_mfc_avc_directmode_state(ctx, encoder_context);
694     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
695 }
696
697
698 static VAStatus gen9_mfc_run(VADriverContextP ctx,
699                              struct encode_state *encode_state,
700                              struct intel_encoder_context *encoder_context)
701 {
702     struct intel_batchbuffer *batch = encoder_context->base.batch;
703
704     intel_batchbuffer_flush(batch);             //run the pipeline
705
706     return VA_STATUS_SUCCESS;
707 }
708
709
710 static VAStatus
711 gen9_mfc_stop(VADriverContextP ctx,
712               struct encode_state *encode_state,
713               struct intel_encoder_context *encoder_context,
714               int *encoded_bits_size)
715 {
716     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
717     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
718     VACodedBufferSegment *coded_buffer_segment;
719
720     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
721     assert(vaStatus == VA_STATUS_SUCCESS);
722     *encoded_bits_size = coded_buffer_segment->size * 8;
723     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
724
725     return VA_STATUS_SUCCESS;
726 }
727
728
729 static void
730 gen9_mfc_avc_slice_state(VADriverContextP ctx,
731                          VAEncPictureParameterBufferH264 *pic_param,
732                          VAEncSliceParameterBufferH264 *slice_param,
733                          struct encode_state *encode_state,
734                          struct intel_encoder_context *encoder_context,
735                          int rate_control_enable,
736                          int qp,
737                          struct intel_batchbuffer *batch)
738 {
739     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
740     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
741     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
742     int beginmb = slice_param->macroblock_address;
743     int endmb = beginmb + slice_param->num_macroblocks;
744     int beginx = beginmb % width_in_mbs;
745     int beginy = beginmb / width_in_mbs;
746     int nextx =  endmb % width_in_mbs;
747     int nexty = endmb / width_in_mbs;
748     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
749     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
750     int maxQpN, maxQpP;
751     unsigned char correct[6], grow, shrink;
752     int i;
753     int weighted_pred_idc = 0;
754     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
755     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
756     int num_ref_l0 = 0, num_ref_l1 = 0;
757
758     if (batch == NULL)
759         batch = encoder_context->base.batch;
760
761     if (slice_type == SLICE_TYPE_I) {
762         luma_log2_weight_denom = 0;
763         chroma_log2_weight_denom = 0;
764     } else if (slice_type == SLICE_TYPE_P) {
765         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
766         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
767
768         if (slice_param->num_ref_idx_active_override_flag)
769             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
770     } else if (slice_type == SLICE_TYPE_B) {
771         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
772         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
773         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
774
775         if (slice_param->num_ref_idx_active_override_flag) {
776             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
777             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
778         }
779
780         if (weighted_pred_idc == 2) {
781             /* 8.4.3 - Derivation process for prediction weights (8-279) */
782             luma_log2_weight_denom = 5;
783             chroma_log2_weight_denom = 5;
784         }
785     }
786
787     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
788     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
789
790     for (i = 0; i < 6; i++)
791         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
792
793     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
794         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
795     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
796         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
797
798     BEGIN_BCS_BATCH(batch, 11);;
799
800     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
801     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
802
803     OUT_BCS_BATCH(batch,
804                   (num_ref_l0 << 16) |
805                   (num_ref_l1 << 24) |
806                   (chroma_log2_weight_denom << 8) |
807                   (luma_log2_weight_denom << 0));
808
809     OUT_BCS_BATCH(batch,
810                   (weighted_pred_idc << 30) |
811                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
812                   (slice_param->disable_deblocking_filter_idc << 27) |
813                   (slice_param->cabac_init_idc << 24) |
814                   (qp<<16) |                    /*Slice Quantization Parameter*/
815                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
816                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
817     OUT_BCS_BATCH(batch,
818                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
819                   (beginx << 16) |
820                   slice_param->macroblock_address );
821     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
822     OUT_BCS_BATCH(batch,
823                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
824                   (1 << 30) |           /*ResetRateControlCounter*/
825                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
826                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
827                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/
828                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
829                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/
830                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/
831                   (last_slice << 19) |     /*IsLastSlice*/
832                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
833                   (1 << 17) |       /*HeaderPresentFlag*/
834                   (1 << 16) |       /*SliceData PresentFlag*/
835                   (1 << 15) |       /*TailPresentFlag*/
836                   (1 << 13) |       /*RBSP NAL TYPE*/
837                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
838     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
839     OUT_BCS_BATCH(batch,
840                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/
841                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
842                   (shrink << 8)  |
843                   (grow << 0));
844     OUT_BCS_BATCH(batch,
845                   (correct[5] << 20) |
846                   (correct[4] << 16) |
847                   (correct[3] << 12) |
848                   (correct[2] << 8) |
849                   (correct[1] << 4) |
850                   (correct[0] << 0));
851     OUT_BCS_BATCH(batch, 0);
852
853     ADVANCE_BCS_BATCH(batch);
854 }
855
856
857 static int
858 gen9_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
859                               int qp,unsigned int *msg,
860                               struct intel_encoder_context *encoder_context,
861                               unsigned char target_mb_size, unsigned char max_mb_size,
862                               struct intel_batchbuffer *batch)
863 {
864     int len_in_dwords = 12;
865     unsigned int intra_msg;
866 #define         INTRA_MSG_FLAG          (1 << 13)
867 #define         INTRA_MBTYPE_MASK       (0x1F0000)
868     if (batch == NULL)
869         batch = encoder_context->base.batch;
870
871     BEGIN_BCS_BATCH(batch, len_in_dwords);
872
873     intra_msg = msg[0] & 0xC0FF;
874     intra_msg |= INTRA_MSG_FLAG;
875     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
876     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
877     OUT_BCS_BATCH(batch, 0);
878     OUT_BCS_BATCH(batch, 0);
879     OUT_BCS_BATCH(batch,
880                   (0 << 24) |           /* PackedMvNum, Debug*/
881                   (0 << 20) |           /* No motion vector */
882                   (1 << 19) |           /* CbpDcY */
883                   (1 << 18) |           /* CbpDcU */
884                   (1 << 17) |           /* CbpDcV */
885                   intra_msg);
886
887     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
888     OUT_BCS_BATCH(batch, 0x000F000F);     /* Code Block Pattern */
889
890     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
891
892     /*Stuff for Intra MB*/
893     OUT_BCS_BATCH(batch, msg[1]);       /* We using Intra16x16 no 4x4 predmode*/
894     OUT_BCS_BATCH(batch, msg[2]);
895     OUT_BCS_BATCH(batch, msg[3]&0xFF);
896
897     /*MaxSizeInWord and TargetSzieInWord*/
898     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
899                   (target_mb_size << 16) );
900
901     OUT_BCS_BATCH(batch, 0);
902
903     ADVANCE_BCS_BATCH(batch);
904
905     return len_in_dwords;
906 }
907
908 static int
909 gen9_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
910                               unsigned int *msg, unsigned int offset,
911                               struct intel_encoder_context *encoder_context,
912                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
913                               struct intel_batchbuffer *batch)
914 {
915     struct gen6_vme_context *vme_context = encoder_context->vme_context;
916     int len_in_dwords = 12;
917     unsigned int inter_msg = 0;
918     if (batch == NULL)
919         batch = encoder_context->base.batch;
920     {
921 #define MSG_MV_OFFSET   4
922         unsigned int *mv_ptr;
923         mv_ptr = msg + MSG_MV_OFFSET;
924         /* MV of VME output is based on 16 sub-blocks. So it is necessary
925          * to convert them to be compatible with the format of AVC_PAK
926          * command.
927          */
928         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
929             /* MV[0] and MV[2] are replicated */
930             mv_ptr[4] = mv_ptr[0];
931             mv_ptr[5] = mv_ptr[1];
932             mv_ptr[2] = mv_ptr[8];
933             mv_ptr[3] = mv_ptr[9];
934             mv_ptr[6] = mv_ptr[8];
935             mv_ptr[7] = mv_ptr[9];
936         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
937             /* MV[0] and MV[1] are replicated */
938             mv_ptr[2] = mv_ptr[0];
939             mv_ptr[3] = mv_ptr[1];
940             mv_ptr[4] = mv_ptr[16];
941             mv_ptr[5] = mv_ptr[17];
942             mv_ptr[6] = mv_ptr[24];
943             mv_ptr[7] = mv_ptr[25];
944         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
945                    !(msg[1] & SUBMB_SHAPE_MASK)) {
946             /* Don't touch MV[0] or MV[1] */
947             mv_ptr[2] = mv_ptr[8];
948             mv_ptr[3] = mv_ptr[9];
949             mv_ptr[4] = mv_ptr[16];
950             mv_ptr[5] = mv_ptr[17];
951             mv_ptr[6] = mv_ptr[24];
952             mv_ptr[7] = mv_ptr[25];
953         }
954     }
955
956     BEGIN_BCS_BATCH(batch, len_in_dwords);
957
958     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
959
960     inter_msg = 32;
961     /* MV quantity */
962     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
963         if (msg[1] & SUBMB_SHAPE_MASK)
964             inter_msg = 128;
965     }
966     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
967     OUT_BCS_BATCH(batch, offset);
968     inter_msg = msg[0] & (0x1F00FFFF);
969     inter_msg |= INTER_MV8;
970     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
971     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
972         (msg[1] & SUBMB_SHAPE_MASK)) {
973         inter_msg |= INTER_MV32;
974     }
975
976     OUT_BCS_BATCH(batch, inter_msg);
977
978     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
979     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */
980 #if 0
981     if ( slice_type == SLICE_TYPE_B) {
982         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
983     } else {
984         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
985     }
986 #else
987     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
988 #endif
989
990     inter_msg = msg[1] >> 8;
991     /*Stuff for Inter MB*/
992     OUT_BCS_BATCH(batch, inter_msg);
993     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
994     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
995
996     /*MaxSizeInWord and TargetSzieInWord*/
997     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
998                   (target_mb_size << 16) );
999
1000     OUT_BCS_BATCH(batch, 0x0);
1001     ADVANCE_BCS_BATCH(batch);
1002
1003     return len_in_dwords;
1004 }
1005
1006 #define         AVC_INTRA_RDO_OFFSET    4
1007 #define         AVC_INTER_RDO_OFFSET    10
1008 #define         AVC_INTER_MSG_OFFSET    8
1009 #define         AVC_INTER_MV_OFFSET             48
1010 #define         AVC_RDO_MASK            0xFFFF
1011
1012 static void
1013 gen9_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1014                                        struct encode_state *encode_state,
1015                                        struct intel_encoder_context *encoder_context,
1016                                        int slice_index,
1017                                        struct intel_batchbuffer *slice_batch)
1018 {
1019     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1020     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1021     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1022     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1023     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1024     unsigned int *msg = NULL, offset = 0;
1025     unsigned char *msg_ptr = NULL;
1026     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1027     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1028     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1029     int i,x,y;
1030     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1031     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1032     unsigned int tail_data[] = { 0x0, 0x0 };
1033     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1034     int is_intra = slice_type == SLICE_TYPE_I;
1035     int qp_slice;
1036     int qp_mb;
1037
1038     qp_slice = qp;
1039     if (rate_control_mode == VA_RC_CBR) {
1040         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1041         if (encode_state->slice_header_index[slice_index] == 0) {
1042             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1043             qp_slice = qp;
1044         }
1045     }
1046
1047     /* only support for 8-bit pixel bit-depth */
1048     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1049     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1050     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1051     assert(qp >= 0 && qp < 52);
1052
1053          gen9_mfc_avc_slice_state(ctx,
1054                                   pPicParameter,
1055                                   pSliceParameter,
1056                                   encode_state, encoder_context,
1057                                   (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
1058
1059         if ( slice_index == 0)
1060             intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1061
1062          intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1063
1064     dri_bo_map(vme_context->vme_output.bo , 1);
1065     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1066
1067     if (is_intra) {
1068         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1069     } else {
1070         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1071     }
1072
1073     for (i = pSliceParameter->macroblock_address;
1074         i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1075         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1076         x = i % width_in_mbs;
1077         y = i / width_in_mbs;
1078         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1079
1080         if (vme_context->roi_enabled) {
1081             qp_mb = *(vme_context->qp_per_mb + i);
1082         } else
1083             qp_mb = qp;
1084
1085         if (is_intra) {
1086             assert(msg);
1087             gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1088         } else {
1089             int inter_rdo, intra_rdo;
1090             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1091             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1092             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1093             if (intra_rdo < inter_rdo) {
1094                 gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1095             } else {
1096                 msg += AVC_INTER_MSG_OFFSET;
1097                 gen9_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1098             }
1099         }
1100     }
1101
1102     dri_bo_unmap(vme_context->vme_output.bo);
1103
1104     if ( last_slice ) {
1105         mfc_context->insert_object(ctx, encoder_context,
1106                                    tail_data, 2, 8,
1107                                    2, 1, 1, 0, slice_batch);
1108     } else {
1109         mfc_context->insert_object(ctx, encoder_context,
1110                                    tail_data, 1, 8,
1111                                    1, 1, 1, 0, slice_batch);
1112     }
1113
1114
1115 }
1116
1117 static dri_bo *
1118 gen9_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1119                                   struct encode_state *encode_state,
1120                                   struct intel_encoder_context *encoder_context)
1121 {
1122     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1123     struct intel_batchbuffer *batch;
1124     dri_bo *batch_bo;
1125     int i;
1126
1127     batch = mfc_context->aux_batchbuffer;
1128     batch_bo = batch->buffer;
1129     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1130         gen9_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1131     }
1132
1133     intel_batchbuffer_align(batch, 8);
1134
1135     BEGIN_BCS_BATCH(batch, 2);
1136     OUT_BCS_BATCH(batch, 0);
1137     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1138     ADVANCE_BCS_BATCH(batch);
1139
1140     dri_bo_reference(batch_bo);
1141     intel_batchbuffer_free(batch);
1142     mfc_context->aux_batchbuffer = NULL;
1143
1144     return batch_bo;
1145 }
1146
1147 static void
1148 gen9_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1149                                     struct encode_state *encode_state,
1150                                     struct intel_encoder_context *encoder_context)
1151
1152 {
1153     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1154     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1155
1156     assert(vme_context->vme_output.bo);
1157     mfc_context->buffer_suface_setup(ctx,
1158                                      &mfc_context->gpe_context,
1159                                      &vme_context->vme_output,
1160                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1161                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1162     assert(mfc_context->aux_batchbuffer_surface.bo);
1163     mfc_context->buffer_suface_setup(ctx,
1164                                      &mfc_context->gpe_context,
1165                                      &mfc_context->aux_batchbuffer_surface,
1166                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1167                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1168 }
1169
1170 static void
1171 gen9_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1172                                      struct encode_state *encode_state,
1173                                      struct intel_encoder_context *encoder_context)
1174
1175 {
1176     struct i965_driver_data *i965 = i965_driver_data(ctx);
1177     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1178     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1179     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1180     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1181     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1182     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1183     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1184     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
1185                                                            "MFC batchbuffer",
1186                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1187                                                            0x1000);
1188     mfc_context->buffer_suface_setup(ctx,
1189                                      &mfc_context->gpe_context,
1190                                      &mfc_context->mfc_batchbuffer_surface,
1191                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1192                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1193 }
1194
1195 static void
1196 gen9_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1197                                     struct encode_state *encode_state,
1198                                     struct intel_encoder_context *encoder_context)
1199 {
1200     gen9_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1201     gen9_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1202 }
1203
1204 static void
1205 gen9_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1206                                 struct encode_state *encode_state,
1207                                 struct intel_encoder_context *encoder_context)
1208 {
1209     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1210     struct gen6_interface_descriptor_data *desc;
1211     int i;
1212     dri_bo *bo;
1213
1214     bo = mfc_context->gpe_context.idrt.bo;
1215     dri_bo_map(bo, 1);
1216     assert(bo->virtual);
1217     desc = bo->virtual;
1218
1219     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1220         struct i965_kernel *kernel;
1221
1222         kernel = &mfc_context->gpe_context.kernels[i];
1223         assert(sizeof(*desc) == 32);
1224
1225         /*Setup the descritor table*/
1226         memset(desc, 0, sizeof(*desc));
1227         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1228         desc->desc2.sampler_count = 0;
1229         desc->desc2.sampler_state_pointer = 0;
1230         desc->desc3.binding_table_entry_count = 2;
1231         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1232         desc->desc4.constant_urb_entry_read_offset = 0;
1233         desc->desc4.constant_urb_entry_read_length = 4;
1234
1235         /*kernel start*/
1236         dri_bo_emit_reloc(bo,
1237                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1238                           0,
1239                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1240                           kernel->bo);
1241         desc++;
1242     }
1243
1244     dri_bo_unmap(bo);
1245 }
1246
1247 static void
1248 gen9_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1249                                     struct encode_state *encode_state,
1250                                     struct intel_encoder_context *encoder_context)
1251 {
1252     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1253
1254     (void)mfc_context;
1255 }
1256
1257 static void
1258 gen9_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1259                                          int index,
1260                                          int head_offset,
1261                                          int batchbuffer_offset,
1262                                          int head_size,
1263                                          int tail_size,
1264                                          int number_mb_cmds,
1265                                          int first_object,
1266                                          int last_object,
1267                                          int last_slice,
1268                                          int mb_x,
1269                                          int mb_y,
1270                                          int width_in_mbs,
1271                                          int qp)
1272 {
1273     BEGIN_BATCH(batch, 12);
1274
1275     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1276     OUT_BATCH(batch, index);
1277     OUT_BATCH(batch, 0);
1278     OUT_BATCH(batch, 0);
1279     OUT_BATCH(batch, 0);
1280     OUT_BATCH(batch, 0);
1281
1282     /*inline data */
1283     OUT_BATCH(batch, head_offset);
1284     OUT_BATCH(batch, batchbuffer_offset);
1285     OUT_BATCH(batch,
1286               head_size << 16 |
1287               tail_size);
1288     OUT_BATCH(batch,
1289               number_mb_cmds << 16 |
1290               first_object << 2 |
1291               last_object << 1 |
1292               last_slice);
1293     OUT_BATCH(batch,
1294               mb_y << 8 |
1295               mb_x);
1296     OUT_BATCH(batch,
1297               qp << 16 |
1298               width_in_mbs);
1299
1300     ADVANCE_BATCH(batch);
1301 }
1302
1303 static void
1304 gen9_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1305                                        struct intel_encoder_context *encoder_context,
1306                                        VAEncSliceParameterBufferH264 *slice_param,
1307                                        int head_offset,
1308                                        unsigned short head_size,
1309                                        unsigned short tail_size,
1310                                        int batchbuffer_offset,
1311                                        int qp,
1312                                        int last_slice)
1313 {
1314     struct intel_batchbuffer *batch = encoder_context->base.batch;
1315     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1316     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1317     int total_mbs = slice_param->num_macroblocks;
1318     int number_mb_cmds = 128;
1319     int starting_mb = 0;
1320     int last_object = 0;
1321     int first_object = 1;
1322     int i;
1323     int mb_x, mb_y;
1324     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1325
1326     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1327         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1328         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1329         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1330         assert(mb_x <= 255 && mb_y <= 255);
1331
1332         starting_mb += number_mb_cmds;
1333
1334         gen9_mfc_batchbuffer_emit_object_command(batch,
1335                                                  index,
1336                                                  head_offset,
1337                                                  batchbuffer_offset,
1338                                                  head_size,
1339                                                  tail_size,
1340                                                  number_mb_cmds,
1341                                                  first_object,
1342                                                  last_object,
1343                                                  last_slice,
1344                                                  mb_x,
1345                                                  mb_y,
1346                                                  width_in_mbs,
1347                                                  qp);
1348
1349         if (first_object) {
1350             head_offset += head_size;
1351             batchbuffer_offset += head_size;
1352         }
1353
1354         if (last_object) {
1355             head_offset += tail_size;
1356             batchbuffer_offset += tail_size;
1357         }
1358
1359         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1360
1361         first_object = 0;
1362     }
1363
1364     if (!last_object) {
1365         last_object = 1;
1366         number_mb_cmds = total_mbs % number_mb_cmds;
1367         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1368         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1369         assert(mb_x <= 255 && mb_y <= 255);
1370         starting_mb += number_mb_cmds;
1371
1372         gen9_mfc_batchbuffer_emit_object_command(batch,
1373                                                  index,
1374                                                  head_offset,
1375                                                  batchbuffer_offset,
1376                                                  head_size,
1377                                                  tail_size,
1378                                                  number_mb_cmds,
1379                                                  first_object,
1380                                                  last_object,
1381                                                  last_slice,
1382                                                  mb_x,
1383                                                  mb_y,
1384                                                  width_in_mbs,
1385                                                  qp);
1386     }
1387 }
1388
1389 /*
1390  * return size in Owords (16bytes)
1391  */
1392 static int
1393 gen9_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1394                                struct encode_state *encode_state,
1395                                struct intel_encoder_context *encoder_context,
1396                                int slice_index,
1397                                int batchbuffer_offset)
1398 {
1399     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1400     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1401     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1402     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1403     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1404     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1405     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1406     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1407     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1408     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1409     unsigned int tail_data[] = { 0x0, 0x0 };
1410     long head_offset;
1411     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1412     unsigned short head_size, tail_size;
1413     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1414     int qp_slice;
1415
1416     qp_slice = qp;
1417     if (rate_control_mode == VA_RC_CBR) {
1418         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1419         if (encode_state->slice_header_index[slice_index] == 0) {
1420             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1421             qp_slice = qp;
1422         }
1423     }
1424
1425     /* only support for 8-bit pixel bit-depth */
1426     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1427     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1428     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1429     assert(qp >= 0 && qp < 52);
1430
1431     head_offset = old_used / 16;
1432     gen9_mfc_avc_slice_state(ctx,
1433                              pPicParameter,
1434                              pSliceParameter,
1435                              encode_state,
1436                              encoder_context,
1437                              (rate_control_mode == VA_RC_CBR),
1438                              qp_slice,
1439                              slice_batch);
1440
1441     if (slice_index == 0)
1442         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1443
1444
1445     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1446
1447
1448     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1449     used = intel_batchbuffer_used_size(slice_batch);
1450     head_size = (used - old_used) / 16;
1451     old_used = used;
1452
1453     /* tail */
1454     if (last_slice) {
1455         mfc_context->insert_object(ctx,
1456                                    encoder_context,
1457                                    tail_data,
1458                                    2,
1459                                    8,
1460                                    2,
1461                                    1,
1462                                    1,
1463                                    0,
1464                                    slice_batch);
1465     } else {
1466         mfc_context->insert_object(ctx,
1467                                    encoder_context,
1468                                    tail_data,
1469                                    1,
1470                                    8,
1471                                    1,
1472                                    1,
1473                                    1,
1474                                    0,
1475                                    slice_batch);
1476     }
1477
1478     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1479     used = intel_batchbuffer_used_size(slice_batch);
1480     tail_size = (used - old_used) / 16;
1481
1482     gen9_mfc_avc_batchbuffer_slice_command(ctx,
1483                                            encoder_context,
1484                                            pSliceParameter,
1485                                            head_offset,
1486                                            head_size,
1487                                            tail_size,
1488                                            batchbuffer_offset,
1489                                            qp,
1490                                            last_slice);
1491
1492     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1493 }
1494
1495 static void
1496 gen9_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1497                                   struct encode_state *encode_state,
1498                                   struct intel_encoder_context *encoder_context)
1499 {
1500     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1501     struct intel_batchbuffer *batch = encoder_context->base.batch;
1502     int i, size, offset = 0;
1503
1504     intel_batchbuffer_start_atomic(batch, 0x4000);
1505     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1506
1507     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1508         size = gen9_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1509         offset += size;
1510     }
1511
1512     intel_batchbuffer_end_atomic(batch);
1513     intel_batchbuffer_flush(batch);
1514 }
1515
1516 static void
1517 gen9_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1518                                struct encode_state *encode_state,
1519                                struct intel_encoder_context *encoder_context)
1520 {
1521     gen9_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1522     gen9_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1523     gen9_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1524     gen9_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1525 }
1526
1527 static dri_bo *
1528 gen9_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1529                                   struct encode_state *encode_state,
1530                                   struct intel_encoder_context *encoder_context)
1531 {
1532     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1533
1534     gen9_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1535     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1536
1537     return mfc_context->mfc_batchbuffer_surface.bo;
1538 }
1539
1540
1541 static void
1542 gen9_mfc_avc_pipeline_programing(VADriverContextP ctx,
1543                                  struct encode_state *encode_state,
1544                                  struct intel_encoder_context *encoder_context)
1545 {
1546     struct intel_batchbuffer *batch = encoder_context->base.batch;
1547     dri_bo *slice_batch_bo;
1548
1549     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1550         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1551         assert(0);
1552         return;
1553     }
1554
1555     if (encoder_context->soft_batch_force)
1556         slice_batch_bo = gen9_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1557     else
1558         slice_batch_bo = gen9_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1559
1560
1561     // begin programing
1562     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1563     intel_batchbuffer_emit_mi_flush(batch);
1564
1565     // picture level programing
1566     gen9_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1567
1568     BEGIN_BCS_BATCH(batch, 3);
1569     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1570     OUT_BCS_RELOC(batch,
1571                   slice_batch_bo,
1572                   I915_GEM_DOMAIN_COMMAND, 0,
1573                   0);
1574     OUT_BCS_BATCH(batch, 0);
1575     ADVANCE_BCS_BATCH(batch);
1576
1577     // end programing
1578     intel_batchbuffer_end_atomic(batch);
1579
1580     dri_bo_unreference(slice_batch_bo);
1581 }
1582
1583
1584 static VAStatus
1585 gen9_mfc_avc_encode_picture(VADriverContextP ctx,
1586                             struct encode_state *encode_state,
1587                             struct intel_encoder_context *encoder_context)
1588 {
1589     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1590     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1591     int current_frame_bits_size;
1592     int sts;
1593
1594     for (;;) {
1595         gen9_mfc_init(ctx, encode_state, encoder_context);
1596         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1597         /*Programing bcs pipeline*/
1598         gen9_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
1599         gen9_mfc_run(ctx, encode_state, encoder_context);
1600         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1601             gen9_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1602             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1603             if (sts == BRC_NO_HRD_VIOLATION) {
1604                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1605                 break;
1606             }
1607             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1608                 if (!mfc_context->hrd.violation_noted) {
1609                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1610                     mfc_context->hrd.violation_noted = 1;
1611                 }
1612                 return VA_STATUS_SUCCESS;
1613             }
1614         } else {
1615             break;
1616         }
1617     }
1618
1619     return VA_STATUS_SUCCESS;
1620 }
1621
1622 static void
1623 gen9_mfc_context_destroy(void *context)
1624 {
1625     struct gen6_mfc_context *mfc_context = context;
1626     int i;
1627
1628     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1629     mfc_context->post_deblocking_output.bo = NULL;
1630
1631     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1632     mfc_context->pre_deblocking_output.bo = NULL;
1633
1634     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1635     mfc_context->uncompressed_picture_source.bo = NULL;
1636
1637     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
1638     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1639
1640     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1641         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1642         mfc_context->direct_mv_buffers[i].bo = NULL;
1643     }
1644
1645     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1646     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1647
1648     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1649     mfc_context->macroblock_status_buffer.bo = NULL;
1650
1651     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1652     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1653
1654     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1655     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1656
1657
1658     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1659         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1660         mfc_context->reference_surfaces[i].bo = NULL;
1661     }
1662
1663     i965_gpe_context_destroy(&mfc_context->gpe_context);
1664
1665     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
1666     mfc_context->mfc_batchbuffer_surface.bo = NULL;
1667
1668     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1669     mfc_context->aux_batchbuffer_surface.bo = NULL;
1670
1671     if (mfc_context->aux_batchbuffer)
1672         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1673
1674     mfc_context->aux_batchbuffer = NULL;
1675
1676     free(mfc_context);
1677 }
1678
1679 static VAStatus gen9_mfc_pipeline(VADriverContextP ctx,
1680                                   VAProfile profile,
1681                                   struct encode_state *encode_state,
1682                                   struct intel_encoder_context *encoder_context)
1683 {
1684     VAStatus vaStatus;
1685
1686     switch (profile) {
1687     case VAProfileH264ConstrainedBaseline:
1688     case VAProfileH264Main:
1689     case VAProfileH264High:
1690     case VAProfileH264MultiviewHigh:
1691     case VAProfileH264StereoHigh:
1692         vaStatus = gen9_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
1693         break;
1694
1695     default:
1696         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1697         break;
1698     }
1699
1700     return vaStatus;
1701 }
1702
1703 Bool gen9_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1704 {
1705     struct gen6_mfc_context *mfc_context = NULL;
1706
1707
1708     if ((encoder_context->codec == CODEC_H264) ||
1709         (encoder_context->codec == CODEC_H264_MVC)) {
1710             return gen8_mfc_context_init(ctx, encoder_context);
1711     }
1712
1713
1714     if ((encoder_context->codec == CODEC_VP8) ||
1715         (encoder_context->codec == CODEC_MPEG2))
1716         return gen8_mfc_context_init(ctx, encoder_context);
1717
1718     mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
1719     assert(mfc_context);
1720     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1721
1722     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
1723     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1724
1725     mfc_context->gpe_context.curbe.length = 32 * 4;
1726
1727     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1728     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
1729     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
1730     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1731     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
1732
1733     i965_gpe_load_kernels(ctx,
1734                           &mfc_context->gpe_context,
1735                           gen9_mfc_kernels,
1736                           NUM_MFC_KERNEL);
1737
1738     mfc_context->pipe_mode_select = gen9_mfc_pipe_mode_select;
1739     mfc_context->set_surface_state = gen9_mfc_surface_state;
1740     mfc_context->ind_obj_base_addr_state = gen9_mfc_ind_obj_base_addr_state;
1741     mfc_context->avc_img_state = gen9_mfc_avc_img_state;
1742     mfc_context->avc_qm_state = gen9_mfc_avc_qm_state;
1743     mfc_context->avc_fqm_state = gen9_mfc_avc_fqm_state;
1744     mfc_context->insert_object = gen9_mfc_avc_insert_object;
1745     mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
1746
1747     encoder_context->mfc_context = mfc_context;
1748     encoder_context->mfc_context_destroy = gen9_mfc_context_destroy;
1749     encoder_context->mfc_pipeline = gen9_mfc_pipeline;
1750     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
1751
1752     return True;
1753 }