OSDN Git Service

Skl: Add the initial support for media encoding
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_mfc.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
47 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
48 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
49
50 #define MFC_SOFTWARE_HASWELL    1
51
52 #define B0_STEP_REV             2
53 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
54
55 static const uint32_t gen9_mfc_batchbuffer_avc_intra[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
57 };
58
59 static const uint32_t gen9_mfc_batchbuffer_avc_inter[][4] = {
60 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
61 };
62
63 static struct i965_kernel gen9_mfc_kernels[] = {
64     {
65         "MFC AVC INTRA BATCHBUFFER ",
66         MFC_BATCHBUFFER_AVC_INTRA,
67         gen9_mfc_batchbuffer_avc_intra,
68         sizeof(gen9_mfc_batchbuffer_avc_intra),
69         NULL
70     },
71
72     {
73         "MFC AVC INTER BATCHBUFFER ",
74         MFC_BATCHBUFFER_AVC_INTER,
75         gen9_mfc_batchbuffer_avc_inter,
76         sizeof(gen9_mfc_batchbuffer_avc_inter),
77         NULL
78     },
79 };
80
81 #define         INTER_MODE_MASK         0x03
82 #define         INTER_8X8               0x03
83 #define         INTER_16X8              0x01
84 #define         INTER_8X16              0x02
85 #define         SUBMB_SHAPE_MASK        0x00FF00
86
87 #define         INTER_MV8               (4 << 20)
88 #define         INTER_MV32              (6 << 20)
89
90
91 static void
92 gen9_mfc_pipe_mode_select(VADriverContextP ctx,
93                           int standard_select,
94                           struct intel_encoder_context *encoder_context)
95 {
96     struct intel_batchbuffer *batch = encoder_context->base.batch;
97     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
98
99     assert(standard_select == MFX_FORMAT_MPEG2 ||
100            standard_select == MFX_FORMAT_AVC);
101
102     BEGIN_BCS_BATCH(batch, 5);
103
104     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
105     OUT_BCS_BATCH(batch,
106                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
107                   (MFD_MODE_VLD << 15) | /* VLD mode */
108                   (0 << 10) | /* Stream-Out Enable */
109                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
110                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
111                   (0 << 5)  | /* not in stitch mode */
112                   (1 << 4)  | /* encoding mode */
113                   (standard_select << 0));  /* standard select: avc or mpeg2 */
114     OUT_BCS_BATCH(batch,
115                   (0 << 7)  | /* expand NOA bus flag */
116                   (0 << 6)  | /* disable slice-level clock gating */
117                   (0 << 5)  | /* disable clock gating for NOA */
118                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
119                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
120                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
121                   (0 << 1)  |
122                   (0 << 0));
123     OUT_BCS_BATCH(batch, 0);
124     OUT_BCS_BATCH(batch, 0);
125
126     ADVANCE_BCS_BATCH(batch);
127 }
128
129 static void
130 gen9_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
131 {
132     struct intel_batchbuffer *batch = encoder_context->base.batch;
133     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
134
135     BEGIN_BCS_BATCH(batch, 6);
136
137     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
138     OUT_BCS_BATCH(batch, 0);
139     OUT_BCS_BATCH(batch,
140                   ((mfc_context->surface_state.height - 1) << 18) |
141                   ((mfc_context->surface_state.width - 1) << 4));
142     OUT_BCS_BATCH(batch,
143                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
144                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
145                   (0 << 22) | /* surface object control state, FIXME??? */
146                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
147                   (0 << 2)  | /* must be 0 for interleave U/V */
148                   (1 << 1)  | /* must be tiled */
149                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
150     OUT_BCS_BATCH(batch,
151                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
152                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
153     OUT_BCS_BATCH(batch, 0);
154
155     ADVANCE_BCS_BATCH(batch);
156 }
157
158 static void
159 gen9_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
160                                  struct intel_encoder_context *encoder_context)
161 {
162     struct intel_batchbuffer *batch = encoder_context->base.batch;
163     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
164     struct gen6_vme_context *vme_context = encoder_context->vme_context;
165     int vme_size;
166
167     BEGIN_BCS_BATCH(batch, 26);
168
169     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
170     /* the DW1-3 is for the MFX indirect bistream offset */
171     OUT_BCS_BATCH(batch, 0);
172     OUT_BCS_BATCH(batch, 0);
173     OUT_BCS_BATCH(batch, 0);
174     /* the DW4-5 is the MFX upper bound */
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0);
177
178     vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
179     /* the DW6-10 is for MFX Indirect MV Object Base Address */
180     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
184     OUT_BCS_BATCH(batch, 0);
185
186     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192
193     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
194     OUT_BCS_BATCH(batch, 0);
195     OUT_BCS_BATCH(batch, 0);
196     OUT_BCS_BATCH(batch, 0);
197     OUT_BCS_BATCH(batch, 0);
198     OUT_BCS_BATCH(batch, 0);
199
200     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
201     OUT_BCS_RELOC(batch,
202                   mfc_context->mfc_indirect_pak_bse_object.bo,
203                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                   0);
205     OUT_BCS_BATCH(batch, 0);
206     OUT_BCS_BATCH(batch, 0);
207
208     OUT_BCS_RELOC(batch,
209                   mfc_context->mfc_indirect_pak_bse_object.bo,
210                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
211                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
212     OUT_BCS_BATCH(batch, 0);
213
214     ADVANCE_BCS_BATCH(batch);
215 }
216
217 static void
218 gen9_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
219                        struct intel_encoder_context *encoder_context)
220 {
221     struct intel_batchbuffer *batch = encoder_context->base.batch;
222     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
223     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
224
225     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
226     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
227
228     BEGIN_BCS_BATCH(batch, 16);
229
230     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
231     /*DW1. MB setting of frame */
232     OUT_BCS_BATCH(batch,
233                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
234     OUT_BCS_BATCH(batch,
235                   ((height_in_mbs - 1) << 16) |
236                   ((width_in_mbs - 1) << 0));
237     /* DW3 QP setting */
238     OUT_BCS_BATCH(batch,
239                   (0 << 24) |   /* Second Chroma QP Offset */
240                   (0 << 16) |   /* Chroma QP Offset */
241                   (0 << 14) |   /* Max-bit conformance Intra flag */
242                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
243                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
244                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
245                   (0 << 8)  |   /* FIXME: Image Structure */
246                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
247     OUT_BCS_BATCH(batch,
248                   (0 << 16) |   /* Mininum Frame size */
249                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
250                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
251                   (0 << 13) |   /* CABAC 0 word insertion test enable */
252                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
253                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
254                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
255                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
256                   (0 << 6)  |   /* Only valid for VLD decoding mode */
257                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
258                   (0 << 4)  |   /* Direct 8x8 inference flag */
259                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
260                   (1 << 2)  |   /* Frame MB only flag */
261                   (0 << 1)  |   /* MBAFF mode is in active */
262                   (0 << 0));    /* Field picture flag */
263     /* DW5 Trellis quantization */
264     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
265     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
266                   (0xBB8 << 16) |       /* InterMbMaxSz */
267                   (0xEE8) );            /* IntraMbMaxSz */
268     OUT_BCS_BATCH(batch, 0);            /* Reserved */
269     /* DW8. QP delta */
270     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
271     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
272     /* DW10. Bit setting for MB */
273     OUT_BCS_BATCH(batch, 0x8C000000);
274     OUT_BCS_BATCH(batch, 0x00010000);
275     /* DW12. */
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch, 0x02010100);
278     /* DW14. For short format */
279     OUT_BCS_BATCH(batch, 0);
280     OUT_BCS_BATCH(batch, 0);
281
282     ADVANCE_BCS_BATCH(batch);
283 }
284
285 static void
286 gen9_mfc_qm_state(VADriverContextP ctx,
287                   int qm_type,
288                   unsigned int *qm,
289                   int qm_length,
290                   struct intel_encoder_context *encoder_context)
291 {
292     struct intel_batchbuffer *batch = encoder_context->base.batch;
293     unsigned int qm_buffer[16];
294
295     assert(qm_length <= 16);
296     assert(sizeof(*qm) == 4);
297     memcpy(qm_buffer, qm, qm_length * 4);
298
299     BEGIN_BCS_BATCH(batch, 18);
300     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
301     OUT_BCS_BATCH(batch, qm_type << 0);
302     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
303     ADVANCE_BCS_BATCH(batch);
304 }
305
306 static void
307 gen9_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
308 {
309     unsigned int qm[16] = {
310         0x10101010, 0x10101010, 0x10101010, 0x10101010,
311         0x10101010, 0x10101010, 0x10101010, 0x10101010,
312         0x10101010, 0x10101010, 0x10101010, 0x10101010,
313         0x10101010, 0x10101010, 0x10101010, 0x10101010
314     };
315
316     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
317     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
318     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
319     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
320 }
321
322 static void
323 gen9_mfc_fqm_state(VADriverContextP ctx,
324                    int fqm_type,
325                    unsigned int *fqm,
326                    int fqm_length,
327                    struct intel_encoder_context *encoder_context)
328 {
329     struct intel_batchbuffer *batch = encoder_context->base.batch;
330     unsigned int fqm_buffer[32];
331
332     assert(fqm_length <= 32);
333     assert(sizeof(*fqm) == 4);
334     memcpy(fqm_buffer, fqm, fqm_length * 4);
335
336     BEGIN_BCS_BATCH(batch, 34);
337     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
338     OUT_BCS_BATCH(batch, fqm_type << 0);
339     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
340     ADVANCE_BCS_BATCH(batch);
341 }
342
343 static void
344 gen9_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
345 {
346     unsigned int qm[32] = {
347         0x10001000, 0x10001000, 0x10001000, 0x10001000,
348         0x10001000, 0x10001000, 0x10001000, 0x10001000,
349         0x10001000, 0x10001000, 0x10001000, 0x10001000,
350         0x10001000, 0x10001000, 0x10001000, 0x10001000,
351         0x10001000, 0x10001000, 0x10001000, 0x10001000,
352         0x10001000, 0x10001000, 0x10001000, 0x10001000,
353         0x10001000, 0x10001000, 0x10001000, 0x10001000,
354         0x10001000, 0x10001000, 0x10001000, 0x10001000
355     };
356
357     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
358     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
359     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
360     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
361 }
362
363 static void
364 gen9_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
365                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
366                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
367                            struct intel_batchbuffer *batch)
368 {
369     if (batch == NULL)
370         batch = encoder_context->base.batch;
371
372     if (data_bits_in_last_dw == 0)
373         data_bits_in_last_dw = 32;
374
375     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
376
377     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
378     OUT_BCS_BATCH(batch,
379                   (0 << 16) |   /* always start at offset 0 */
380                   (data_bits_in_last_dw << 8) |
381                   (skip_emul_byte_count << 4) |
382                   (!!emulation_flag << 3) |
383                   ((!!is_last_header) << 2) |
384                   ((!!is_end_of_slice) << 1) |
385                   (0 << 0));    /* FIXME: ??? */
386     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
387
388     ADVANCE_BCS_BATCH(batch);
389 }
390
391
392 static void gen9_mfc_init(VADriverContextP ctx,
393                           struct encode_state *encode_state,
394                           struct intel_encoder_context *encoder_context)
395 {
396     struct i965_driver_data *i965 = i965_driver_data(ctx);
397     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
398     dri_bo *bo;
399     int i;
400     int width_in_mbs = 0;
401     int height_in_mbs = 0;
402     int slice_batchbuffer_size;
403
404     if (encoder_context->codec == CODEC_H264 ||
405         encoder_context->codec == CODEC_H264_MVC) {
406         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
407         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
408         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
409     } else {
410         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
411
412         assert(encoder_context->codec == CODEC_MPEG2);
413
414         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
415         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
416     }
417
418     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
419                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
420
421     /*Encode common setup for MFC*/
422     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
423     mfc_context->post_deblocking_output.bo = NULL;
424
425     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
426     mfc_context->pre_deblocking_output.bo = NULL;
427
428     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
429     mfc_context->uncompressed_picture_source.bo = NULL;
430
431     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
432     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
433
434     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
435         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
436         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
437         mfc_context->direct_mv_buffers[i].bo = NULL;
438     }
439
440     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
441         if (mfc_context->reference_surfaces[i].bo != NULL)
442             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
443         mfc_context->reference_surfaces[i].bo = NULL;
444     }
445
446     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
447     bo = dri_bo_alloc(i965->intel.bufmgr,
448                       "Buffer",
449                       width_in_mbs * 64,
450                       64);
451     assert(bo);
452     mfc_context->intra_row_store_scratch_buffer.bo = bo;
453
454     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
455     bo = dri_bo_alloc(i965->intel.bufmgr,
456                       "Buffer",
457                       width_in_mbs * height_in_mbs * 16,
458                       64);
459     assert(bo);
460     mfc_context->macroblock_status_buffer.bo = bo;
461
462     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
463     bo = dri_bo_alloc(i965->intel.bufmgr,
464                       "Buffer",
465                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
466                       64);
467     assert(bo);
468     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
469
470     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
471     bo = dri_bo_alloc(i965->intel.bufmgr,
472                       "Buffer",
473                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
474                       0x1000);
475     assert(bo);
476     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
477
478     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
479     mfc_context->mfc_batchbuffer_surface.bo = NULL;
480
481     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
482     mfc_context->aux_batchbuffer_surface.bo = NULL;
483
484     if (mfc_context->aux_batchbuffer)
485         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
486
487     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
488     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
489     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
490     mfc_context->aux_batchbuffer_surface.pitch = 16;
491     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
492     mfc_context->aux_batchbuffer_surface.size_block = 16;
493
494     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
495 }
496
497 static void
498 gen9_mfc_pipe_buf_addr_state(VADriverContextP ctx,
499                              struct intel_encoder_context *encoder_context)
500 {
501     struct intel_batchbuffer *batch = encoder_context->base.batch;
502     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
503     int i;
504
505     BEGIN_BCS_BATCH(batch, 61);
506
507     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
508
509     /* the DW1-3 is for pre_deblocking */
510     if (mfc_context->pre_deblocking_output.bo)
511         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
512                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
513                       0);
514     else
515         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
516
517     OUT_BCS_BATCH(batch, 0);
518     OUT_BCS_BATCH(batch, 0);
519     /* the DW4-6 is for the post_deblocking */
520
521     /* post output addr  */
522     if (mfc_context->post_deblocking_output.bo)
523         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
524                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
525                       0);
526     else
527         OUT_BCS_BATCH(batch, 0);
528
529     OUT_BCS_BATCH(batch, 0);
530     OUT_BCS_BATCH(batch, 0);
531
532     /* the DW7-9 is for the uncompressed_picture */
533     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
534                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
535                   0); /* uncompressed data */
536
537     OUT_BCS_BATCH(batch, 0);
538     OUT_BCS_BATCH(batch, 0);
539
540     /* the DW10-12 is for the mb status */
541     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
542                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
543                   0); /* StreamOut data*/
544
545     OUT_BCS_BATCH(batch, 0);
546     OUT_BCS_BATCH(batch, 0);
547
548     /* the DW13-15 is for the intra_row_store_scratch */
549     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
550                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
551                   0);
552
553     OUT_BCS_BATCH(batch, 0);
554     OUT_BCS_BATCH(batch, 0);
555
556     /* the DW16-18 is for the deblocking filter */
557     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
558                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
559                   0);
560
561     OUT_BCS_BATCH(batch, 0);
562     OUT_BCS_BATCH(batch, 0);
563
564     /* the DW 19-50 is for Reference pictures*/
565     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
566         if ( mfc_context->reference_surfaces[i].bo != NULL) {
567             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
568                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
569                           0);
570         } else {
571             OUT_BCS_BATCH(batch, 0);
572         }
573
574         OUT_BCS_BATCH(batch, 0);
575     }
576
577     OUT_BCS_BATCH(batch, 0);
578
579     /* The DW 52-54 is for the MB status buffer */
580     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
581                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
582                   0);
583
584     OUT_BCS_BATCH(batch, 0);
585     OUT_BCS_BATCH(batch, 0);
586
587     /* the DW 55-57 is the ILDB buffer */
588     OUT_BCS_BATCH(batch, 0);
589     OUT_BCS_BATCH(batch, 0);
590     OUT_BCS_BATCH(batch, 0);
591
592     /* the DW 58-60 is the second ILDB buffer */
593     OUT_BCS_BATCH(batch, 0);
594     OUT_BCS_BATCH(batch, 0);
595     OUT_BCS_BATCH(batch, 0);
596
597     ADVANCE_BCS_BATCH(batch);
598 }
599
600 static void
601 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
602                               struct intel_encoder_context *encoder_context)
603 {
604     struct intel_batchbuffer *batch = encoder_context->base.batch;
605     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
606
607     int i;
608
609     BEGIN_BCS_BATCH(batch, 71);
610
611     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
612
613     /* Reference frames and Current frames */
614     /* the DW1-32 is for the direct MV for reference */
615     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
616         if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
617             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
618                           I915_GEM_DOMAIN_INSTRUCTION, 0,
619                           0);
620             OUT_BCS_BATCH(batch, 0);
621         } else {
622             OUT_BCS_BATCH(batch, 0);
623             OUT_BCS_BATCH(batch, 0);
624         }
625     }
626
627     OUT_BCS_BATCH(batch, 0);
628
629     /* the DW34-36 is the MV for the current reference */
630     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
631                   I915_GEM_DOMAIN_INSTRUCTION, 0,
632                   0);
633
634     OUT_BCS_BATCH(batch, 0);
635     OUT_BCS_BATCH(batch, 0);
636
637     /* POL list */
638     for(i = 0; i < 32; i++) {
639         OUT_BCS_BATCH(batch, i/2);
640     }
641     OUT_BCS_BATCH(batch, 0);
642     OUT_BCS_BATCH(batch, 0);
643
644     ADVANCE_BCS_BATCH(batch);
645 }
646
647
648 static void
649 gen9_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
650                                  struct intel_encoder_context *encoder_context)
651 {
652     struct intel_batchbuffer *batch = encoder_context->base.batch;
653     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
654
655     BEGIN_BCS_BATCH(batch, 10);
656
657     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
658     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
659                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
660                   0);
661     OUT_BCS_BATCH(batch, 0);
662     OUT_BCS_BATCH(batch, 0);
663
664     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
665     OUT_BCS_BATCH(batch, 0);
666     OUT_BCS_BATCH(batch, 0);
667     OUT_BCS_BATCH(batch, 0);
668
669     /* the DW7-9 is for Bitplane Read Buffer Base Address */
670     OUT_BCS_BATCH(batch, 0);
671     OUT_BCS_BATCH(batch, 0);
672     OUT_BCS_BATCH(batch, 0);
673
674     ADVANCE_BCS_BATCH(batch);
675 }
676
677
678 static void gen9_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
679                                                       struct encode_state *encode_state,
680                                                       struct intel_encoder_context *encoder_context)
681 {
682     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
683
684     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
685     mfc_context->set_surface_state(ctx, encoder_context);
686     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
687     gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
688     gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
689     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
690     mfc_context->avc_qm_state(ctx, encoder_context);
691     mfc_context->avc_fqm_state(ctx, encoder_context);
692     gen9_mfc_avc_directmode_state(ctx, encoder_context);
693     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
694 }
695
696
697 static VAStatus gen9_mfc_run(VADriverContextP ctx,
698                              struct encode_state *encode_state,
699                              struct intel_encoder_context *encoder_context)
700 {
701     struct intel_batchbuffer *batch = encoder_context->base.batch;
702
703     intel_batchbuffer_flush(batch);             //run the pipeline
704
705     return VA_STATUS_SUCCESS;
706 }
707
708
709 static VAStatus
710 gen9_mfc_stop(VADriverContextP ctx,
711               struct encode_state *encode_state,
712               struct intel_encoder_context *encoder_context,
713               int *encoded_bits_size)
714 {
715     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
716     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
717     VACodedBufferSegment *coded_buffer_segment;
718
719     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
720     assert(vaStatus == VA_STATUS_SUCCESS);
721     *encoded_bits_size = coded_buffer_segment->size * 8;
722     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
723
724     return VA_STATUS_SUCCESS;
725 }
726
727
728 static void
729 gen9_mfc_avc_slice_state(VADriverContextP ctx,
730                          VAEncPictureParameterBufferH264 *pic_param,
731                          VAEncSliceParameterBufferH264 *slice_param,
732                          struct encode_state *encode_state,
733                          struct intel_encoder_context *encoder_context,
734                          int rate_control_enable,
735                          int qp,
736                          struct intel_batchbuffer *batch)
737 {
738     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
739     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
740     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
741     int beginmb = slice_param->macroblock_address;
742     int endmb = beginmb + slice_param->num_macroblocks;
743     int beginx = beginmb % width_in_mbs;
744     int beginy = beginmb / width_in_mbs;
745     int nextx =  endmb % width_in_mbs;
746     int nexty = endmb / width_in_mbs;
747     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
748     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
749     int maxQpN, maxQpP;
750     unsigned char correct[6], grow, shrink;
751     int i;
752     int weighted_pred_idc = 0;
753     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
754     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
755     int num_ref_l0 = 0, num_ref_l1 = 0;
756
757     if (batch == NULL)
758         batch = encoder_context->base.batch;
759
760     if (slice_type == SLICE_TYPE_I) {
761         luma_log2_weight_denom = 0;
762         chroma_log2_weight_denom = 0;
763     } else if (slice_type == SLICE_TYPE_P) {
764         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
765         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
766
767         if (slice_param->num_ref_idx_active_override_flag)
768             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
769     } else if (slice_type == SLICE_TYPE_B) {
770         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
771         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
772         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
773
774         if (slice_param->num_ref_idx_active_override_flag) {
775             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
776             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
777         }
778
779         if (weighted_pred_idc == 2) {
780             /* 8.4.3 - Derivation process for prediction weights (8-279) */
781             luma_log2_weight_denom = 5;
782             chroma_log2_weight_denom = 5;
783         }
784     }
785
786     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
787     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
788
789     for (i = 0; i < 6; i++)
790         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
791
792     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
793         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
794     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
795         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
796
797     BEGIN_BCS_BATCH(batch, 11);;
798
799     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
800     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
801
802     OUT_BCS_BATCH(batch,
803                   (num_ref_l0 << 16) |
804                   (num_ref_l1 << 24) |
805                   (chroma_log2_weight_denom << 8) |
806                   (luma_log2_weight_denom << 0));
807
808     OUT_BCS_BATCH(batch,
809                   (weighted_pred_idc << 30) |
810                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
811                   (slice_param->disable_deblocking_filter_idc << 27) |
812                   (slice_param->cabac_init_idc << 24) |
813                   (qp<<16) |                    /*Slice Quantization Parameter*/
814                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
815                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
816     OUT_BCS_BATCH(batch,
817                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
818                   (beginx << 16) |
819                   slice_param->macroblock_address );
820     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
821     OUT_BCS_BATCH(batch,
822                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
823                   (1 << 30) |           /*ResetRateControlCounter*/
824                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
825                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
826                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/
827                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
828                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/
829                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/
830                   (last_slice << 19) |     /*IsLastSlice*/
831                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
832                   (1 << 17) |       /*HeaderPresentFlag*/
833                   (1 << 16) |       /*SliceData PresentFlag*/
834                   (1 << 15) |       /*TailPresentFlag*/
835                   (1 << 13) |       /*RBSP NAL TYPE*/
836                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
837     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
838     OUT_BCS_BATCH(batch,
839                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/
840                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
841                   (shrink << 8)  |
842                   (grow << 0));
843     OUT_BCS_BATCH(batch,
844                   (correct[5] << 20) |
845                   (correct[4] << 16) |
846                   (correct[3] << 12) |
847                   (correct[2] << 8) |
848                   (correct[1] << 4) |
849                   (correct[0] << 0));
850     OUT_BCS_BATCH(batch, 0);
851
852     ADVANCE_BCS_BATCH(batch);
853 }
854
855
856 #ifdef MFC_SOFTWARE_HASWELL
857
858 static int
859 gen9_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
860                               int qp,unsigned int *msg,
861                               struct intel_encoder_context *encoder_context,
862                               unsigned char target_mb_size, unsigned char max_mb_size,
863                               struct intel_batchbuffer *batch)
864 {
865     int len_in_dwords = 12;
866     unsigned int intra_msg;
867 #define         INTRA_MSG_FLAG          (1 << 13)
868 #define         INTRA_MBTYPE_MASK       (0x1F0000)
869     if (batch == NULL)
870         batch = encoder_context->base.batch;
871
872     BEGIN_BCS_BATCH(batch, len_in_dwords);
873
874     intra_msg = msg[0] & 0xC0FF;
875     intra_msg |= INTRA_MSG_FLAG;
876     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
877     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
878     OUT_BCS_BATCH(batch, 0);
879     OUT_BCS_BATCH(batch, 0);
880     OUT_BCS_BATCH(batch,
881                   (0 << 24) |           /* PackedMvNum, Debug*/
882                   (0 << 20) |           /* No motion vector */
883                   (1 << 19) |           /* CbpDcY */
884                   (1 << 18) |           /* CbpDcU */
885                   (1 << 17) |           /* CbpDcV */
886                   intra_msg);
887
888     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
889     OUT_BCS_BATCH(batch, 0x000F000F);     /* Code Block Pattern */
890
891     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
892
893     /*Stuff for Intra MB*/
894     OUT_BCS_BATCH(batch, msg[1]);       /* We using Intra16x16 no 4x4 predmode*/
895     OUT_BCS_BATCH(batch, msg[2]);
896     OUT_BCS_BATCH(batch, msg[3]&0xFF);
897
898     /*MaxSizeInWord and TargetSzieInWord*/
899     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
900                   (target_mb_size << 16) );
901
902     OUT_BCS_BATCH(batch, 0);
903
904     ADVANCE_BCS_BATCH(batch);
905
906     return len_in_dwords;
907 }
908
909 static int
910 gen9_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
911                               unsigned int *msg, unsigned int offset,
912                               struct intel_encoder_context *encoder_context,
913                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
914                               struct intel_batchbuffer *batch)
915 {
916     struct gen6_vme_context *vme_context = encoder_context->vme_context;
917     int len_in_dwords = 12;
918     unsigned int inter_msg = 0;
919     if (batch == NULL)
920         batch = encoder_context->base.batch;
921     {
922 #define MSG_MV_OFFSET   4
923         unsigned int *mv_ptr;
924         mv_ptr = msg + MSG_MV_OFFSET;
925         /* MV of VME output is based on 16 sub-blocks. So it is necessary
926          * to convert them to be compatible with the format of AVC_PAK
927          * command.
928          */
929         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
930             /* MV[0] and MV[2] are replicated */
931             mv_ptr[4] = mv_ptr[0];
932             mv_ptr[5] = mv_ptr[1];
933             mv_ptr[2] = mv_ptr[8];
934             mv_ptr[3] = mv_ptr[9];
935             mv_ptr[6] = mv_ptr[8];
936             mv_ptr[7] = mv_ptr[9];
937         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
938             /* MV[0] and MV[1] are replicated */
939             mv_ptr[2] = mv_ptr[0];
940             mv_ptr[3] = mv_ptr[1];
941             mv_ptr[4] = mv_ptr[16];
942             mv_ptr[5] = mv_ptr[17];
943             mv_ptr[6] = mv_ptr[24];
944             mv_ptr[7] = mv_ptr[25];
945         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
946                    !(msg[1] & SUBMB_SHAPE_MASK)) {
947             /* Don't touch MV[0] or MV[1] */
948             mv_ptr[2] = mv_ptr[8];
949             mv_ptr[3] = mv_ptr[9];
950             mv_ptr[4] = mv_ptr[16];
951             mv_ptr[5] = mv_ptr[17];
952             mv_ptr[6] = mv_ptr[24];
953             mv_ptr[7] = mv_ptr[25];
954         }
955     }
956
957     BEGIN_BCS_BATCH(batch, len_in_dwords);
958
959     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
960
961     inter_msg = 32;
962     /* MV quantity */
963     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
964         if (msg[1] & SUBMB_SHAPE_MASK)
965             inter_msg = 128;
966     }
967     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
968     OUT_BCS_BATCH(batch, offset);
969     inter_msg = msg[0] & (0x1F00FFFF);
970     inter_msg |= INTER_MV8;
971     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
972     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
973         (msg[1] & SUBMB_SHAPE_MASK)) {
974         inter_msg |= INTER_MV32;
975     }
976
977     OUT_BCS_BATCH(batch, inter_msg);
978
979     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
980     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */
981 #if 0
982     if ( slice_type == SLICE_TYPE_B) {
983         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
984     } else {
985         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
986     }
987 #else
988     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
989 #endif
990
991     inter_msg = msg[1] >> 8;
992     /*Stuff for Inter MB*/
993     OUT_BCS_BATCH(batch, inter_msg);
994     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
995     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
996
997     /*MaxSizeInWord and TargetSzieInWord*/
998     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
999                   (target_mb_size << 16) );
1000
1001     OUT_BCS_BATCH(batch, 0x0);
1002     ADVANCE_BCS_BATCH(batch);
1003
1004     return len_in_dwords;
1005 }
1006
1007 #define         AVC_INTRA_RDO_OFFSET    4
1008 #define         AVC_INTER_RDO_OFFSET    10
1009 #define         AVC_INTER_MSG_OFFSET    8
1010 #define         AVC_INTER_MV_OFFSET             48
1011 #define         AVC_RDO_MASK            0xFFFF
1012
1013 static void
1014 gen9_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1015                                        struct encode_state *encode_state,
1016                                        struct intel_encoder_context *encoder_context,
1017                                        int slice_index,
1018                                        struct intel_batchbuffer *slice_batch)
1019 {
1020     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1021     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1022     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1023     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1024     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1025     unsigned int *msg = NULL, offset = 0;
1026     unsigned char *msg_ptr = NULL;
1027     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1028     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1029     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1030     int i,x,y;
1031     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1032     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1033     unsigned int tail_data[] = { 0x0, 0x0 };
1034     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1035     int is_intra = slice_type == SLICE_TYPE_I;
1036     int qp_slice;
1037
1038     qp_slice = qp;
1039     if (rate_control_mode == VA_RC_CBR) {
1040         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1041         if (encode_state->slice_header_index[slice_index] == 0) {
1042             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1043             qp_slice = qp;
1044         }
1045     }
1046
1047     /* only support for 8-bit pixel bit-depth */
1048     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1049     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1050     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1051     assert(qp >= 0 && qp < 52);
1052
1053          gen9_mfc_avc_slice_state(ctx,
1054                                   pPicParameter,
1055                                   pSliceParameter,
1056                                   encode_state, encoder_context,
1057                                   (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
1058
1059         if ( slice_index == 0)
1060             intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1061
1062          intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1063
1064     dri_bo_map(vme_context->vme_output.bo , 1);
1065     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1066
1067     if (is_intra) {
1068         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1069     } else {
1070         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1071     }
1072
1073     for (i = pSliceParameter->macroblock_address;
1074         i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1075         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1076         x = i % width_in_mbs;
1077         y = i / width_in_mbs;
1078         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1079
1080         if (is_intra) {
1081             assert(msg);
1082             gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1083         } else {
1084             int inter_rdo, intra_rdo;
1085             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1086             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1087             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1088             if (intra_rdo < inter_rdo) {
1089                 gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1090             } else {
1091                 msg += AVC_INTER_MSG_OFFSET;
1092                 gen9_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1093             }
1094         }
1095     }
1096
1097     dri_bo_unmap(vme_context->vme_output.bo);
1098
1099     if ( last_slice ) {
1100         mfc_context->insert_object(ctx, encoder_context,
1101                                    tail_data, 2, 8,
1102                                    2, 1, 1, 0, slice_batch);
1103     } else {
1104         mfc_context->insert_object(ctx, encoder_context,
1105                                    tail_data, 1, 8,
1106                                    1, 1, 1, 0, slice_batch);
1107     }
1108
1109
1110 }
1111
1112 static dri_bo *
1113 gen9_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1114                                   struct encode_state *encode_state,
1115                                   struct intel_encoder_context *encoder_context)
1116 {
1117     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1118     struct intel_batchbuffer *batch;
1119     dri_bo *batch_bo;
1120     int i;
1121
1122     batch = mfc_context->aux_batchbuffer;
1123     batch_bo = batch->buffer;
1124     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1125         gen9_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1126     }
1127
1128     intel_batchbuffer_align(batch, 8);
1129
1130     BEGIN_BCS_BATCH(batch, 2);
1131     OUT_BCS_BATCH(batch, 0);
1132     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1133     ADVANCE_BCS_BATCH(batch);
1134
1135     dri_bo_reference(batch_bo);
1136     intel_batchbuffer_free(batch);
1137     mfc_context->aux_batchbuffer = NULL;
1138
1139     return batch_bo;
1140 }
1141
1142 #else
1143
1144 static void
1145 gen9_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1146                                     struct encode_state *encode_state,
1147                                     struct intel_encoder_context *encoder_context)
1148
1149 {
1150     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1151     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1152
1153     assert(vme_context->vme_output.bo);
1154     mfc_context->buffer_suface_setup(ctx,
1155                                      &mfc_context->gpe_context,
1156                                      &vme_context->vme_output,
1157                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1158                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1159     assert(mfc_context->aux_batchbuffer_surface.bo);
1160     mfc_context->buffer_suface_setup(ctx,
1161                                      &mfc_context->gpe_context,
1162                                      &mfc_context->aux_batchbuffer_surface,
1163                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1164                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1165 }
1166
1167 static void
1168 gen9_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1169                                      struct encode_state *encode_state,
1170                                      struct intel_encoder_context *encoder_context)
1171
1172 {
1173     struct i965_driver_data *i965 = i965_driver_data(ctx);
1174     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1175     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1176     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1177     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1178     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1179     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1180     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1181     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
1182                                                            "MFC batchbuffer",
1183                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1184                                                            0x1000);
1185     mfc_context->buffer_suface_setup(ctx,
1186                                      &mfc_context->gpe_context,
1187                                      &mfc_context->mfc_batchbuffer_surface,
1188                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1189                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1190 }
1191
1192 static void
1193 gen9_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1194                                     struct encode_state *encode_state,
1195                                     struct intel_encoder_context *encoder_context)
1196 {
1197     gen9_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1198     gen9_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1199 }
1200
1201 static void
1202 gen9_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1203                                 struct encode_state *encode_state,
1204                                 struct intel_encoder_context *encoder_context)
1205 {
1206     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1207     struct gen6_interface_descriptor_data *desc;
1208     int i;
1209     dri_bo *bo;
1210
1211     bo = mfc_context->gpe_context.idrt.bo;
1212     dri_bo_map(bo, 1);
1213     assert(bo->virtual);
1214     desc = bo->virtual;
1215
1216     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1217         struct i965_kernel *kernel;
1218
1219         kernel = &mfc_context->gpe_context.kernels[i];
1220         assert(sizeof(*desc) == 32);
1221
1222         /*Setup the descritor table*/
1223         memset(desc, 0, sizeof(*desc));
1224         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1225         desc->desc2.sampler_count = 0;
1226         desc->desc2.sampler_state_pointer = 0;
1227         desc->desc3.binding_table_entry_count = 2;
1228         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1229         desc->desc4.constant_urb_entry_read_offset = 0;
1230         desc->desc4.constant_urb_entry_read_length = 4;
1231
1232         /*kernel start*/
1233         dri_bo_emit_reloc(bo,
1234                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1235                           0,
1236                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1237                           kernel->bo);
1238         desc++;
1239     }
1240
1241     dri_bo_unmap(bo);
1242 }
1243
1244 static void
1245 gen9_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1246                                     struct encode_state *encode_state,
1247                                     struct intel_encoder_context *encoder_context)
1248 {
1249     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1250
1251     (void)mfc_context;
1252 }
1253
1254 static void
1255 gen9_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1256                                          int index,
1257                                          int head_offset,
1258                                          int batchbuffer_offset,
1259                                          int head_size,
1260                                          int tail_size,
1261                                          int number_mb_cmds,
1262                                          int first_object,
1263                                          int last_object,
1264                                          int last_slice,
1265                                          int mb_x,
1266                                          int mb_y,
1267                                          int width_in_mbs,
1268                                          int qp)
1269 {
1270     BEGIN_BATCH(batch, 12);
1271
1272     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1273     OUT_BATCH(batch, index);
1274     OUT_BATCH(batch, 0);
1275     OUT_BATCH(batch, 0);
1276     OUT_BATCH(batch, 0);
1277     OUT_BATCH(batch, 0);
1278
1279     /*inline data */
1280     OUT_BATCH(batch, head_offset);
1281     OUT_BATCH(batch, batchbuffer_offset);
1282     OUT_BATCH(batch,
1283               head_size << 16 |
1284               tail_size);
1285     OUT_BATCH(batch,
1286               number_mb_cmds << 16 |
1287               first_object << 2 |
1288               last_object << 1 |
1289               last_slice);
1290     OUT_BATCH(batch,
1291               mb_y << 8 |
1292               mb_x);
1293     OUT_BATCH(batch,
1294               qp << 16 |
1295               width_in_mbs);
1296
1297     ADVANCE_BATCH(batch);
1298 }
1299
1300 static void
1301 gen9_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1302                                        struct intel_encoder_context *encoder_context,
1303                                        VAEncSliceParameterBufferH264 *slice_param,
1304                                        int head_offset,
1305                                        unsigned short head_size,
1306                                        unsigned short tail_size,
1307                                        int batchbuffer_offset,
1308                                        int qp,
1309                                        int last_slice)
1310 {
1311     struct intel_batchbuffer *batch = encoder_context->base.batch;
1312     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1313     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1314     int total_mbs = slice_param->num_macroblocks;
1315     int number_mb_cmds = 128;
1316     int starting_mb = 0;
1317     int last_object = 0;
1318     int first_object = 1;
1319     int i;
1320     int mb_x, mb_y;
1321     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1322
1323     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1324         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1325         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1326         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1327         assert(mb_x <= 255 && mb_y <= 255);
1328
1329         starting_mb += number_mb_cmds;
1330
1331         gen9_mfc_batchbuffer_emit_object_command(batch,
1332                                                  index,
1333                                                  head_offset,
1334                                                  batchbuffer_offset,
1335                                                  head_size,
1336                                                  tail_size,
1337                                                  number_mb_cmds,
1338                                                  first_object,
1339                                                  last_object,
1340                                                  last_slice,
1341                                                  mb_x,
1342                                                  mb_y,
1343                                                  width_in_mbs,
1344                                                  qp);
1345
1346         if (first_object) {
1347             head_offset += head_size;
1348             batchbuffer_offset += head_size;
1349         }
1350
1351         if (last_object) {
1352             head_offset += tail_size;
1353             batchbuffer_offset += tail_size;
1354         }
1355
1356         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1357
1358         first_object = 0;
1359     }
1360
1361     if (!last_object) {
1362         last_object = 1;
1363         number_mb_cmds = total_mbs % number_mb_cmds;
1364         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1365         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1366         assert(mb_x <= 255 && mb_y <= 255);
1367         starting_mb += number_mb_cmds;
1368
1369         gen9_mfc_batchbuffer_emit_object_command(batch,
1370                                                  index,
1371                                                  head_offset,
1372                                                  batchbuffer_offset,
1373                                                  head_size,
1374                                                  tail_size,
1375                                                  number_mb_cmds,
1376                                                  first_object,
1377                                                  last_object,
1378                                                  last_slice,
1379                                                  mb_x,
1380                                                  mb_y,
1381                                                  width_in_mbs,
1382                                                  qp);
1383     }
1384 }
1385
1386 /*
1387  * return size in Owords (16bytes)
1388  */
1389 static int
1390 gen9_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1391                                struct encode_state *encode_state,
1392                                struct intel_encoder_context *encoder_context,
1393                                int slice_index,
1394                                int batchbuffer_offset)
1395 {
1396     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1397     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1398     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1399     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1400     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1401     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1402     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1403     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1404     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1405     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1406     unsigned int tail_data[] = { 0x0, 0x0 };
1407     long head_offset;
1408     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1409     unsigned short head_size, tail_size;
1410     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1411     int qp_slice;
1412
1413     qp_slice = qp;
1414     if (rate_control_mode == VA_RC_CBR) {
1415         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1416         if (encode_state->slice_header_index[slice_index] == 0) {
1417             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1418             qp_slice = qp;
1419         }
1420     }
1421
1422     /* only support for 8-bit pixel bit-depth */
1423     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1424     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1425     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1426     assert(qp >= 0 && qp < 52);
1427
1428     head_offset = old_used / 16;
1429     gen9_mfc_avc_slice_state(ctx,
1430                              pPicParameter,
1431                              pSliceParameter,
1432                              encode_state,
1433                              encoder_context,
1434                              (rate_control_mode == VA_RC_CBR),
1435                              qp_slice,
1436                              slice_batch);
1437
1438     if (slice_index == 0)
1439         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1440
1441
1442     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1443
1444
1445     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1446     used = intel_batchbuffer_used_size(slice_batch);
1447     head_size = (used - old_used) / 16;
1448     old_used = used;
1449
1450     /* tail */
1451     if (last_slice) {
1452         mfc_context->insert_object(ctx,
1453                                    encoder_context,
1454                                    tail_data,
1455                                    2,
1456                                    8,
1457                                    2,
1458                                    1,
1459                                    1,
1460                                    0,
1461                                    slice_batch);
1462     } else {
1463         mfc_context->insert_object(ctx,
1464                                    encoder_context,
1465                                    tail_data,
1466                                    1,
1467                                    8,
1468                                    1,
1469                                    1,
1470                                    1,
1471                                    0,
1472                                    slice_batch);
1473     }
1474
1475     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1476     used = intel_batchbuffer_used_size(slice_batch);
1477     tail_size = (used - old_used) / 16;
1478
1479     gen9_mfc_avc_batchbuffer_slice_command(ctx,
1480                                            encoder_context,
1481                                            pSliceParameter,
1482                                            head_offset,
1483                                            head_size,
1484                                            tail_size,
1485                                            batchbuffer_offset,
1486                                            qp,
1487                                            last_slice);
1488
1489     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1490 }
1491
1492 static void
1493 gen9_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1494                                   struct encode_state *encode_state,
1495                                   struct intel_encoder_context *encoder_context)
1496 {
1497     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1498     struct intel_batchbuffer *batch = encoder_context->base.batch;
1499     int i, size, offset = 0;
1500
1501     intel_batchbuffer_start_atomic(batch, 0x4000);
1502     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1503
1504     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1505         size = gen9_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1506         offset += size;
1507     }
1508
1509     intel_batchbuffer_end_atomic(batch);
1510     intel_batchbuffer_flush(batch);
1511 }
1512
1513 static void
1514 gen9_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1515                                struct encode_state *encode_state,
1516                                struct intel_encoder_context *encoder_context)
1517 {
1518     gen9_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1519     gen9_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1520     gen9_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1521     gen9_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1522 }
1523
1524 static dri_bo *
1525 gen9_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1526                                   struct encode_state *encode_state,
1527                                   struct intel_encoder_context *encoder_context)
1528 {
1529     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1530
1531     gen9_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1532     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1533
1534     return mfc_context->mfc_batchbuffer_surface.bo;
1535 }
1536
1537 #endif
1538
1539 static void
1540 gen9_mfc_avc_pipeline_programing(VADriverContextP ctx,
1541                                  struct encode_state *encode_state,
1542                                  struct intel_encoder_context *encoder_context)
1543 {
1544     struct intel_batchbuffer *batch = encoder_context->base.batch;
1545     dri_bo *slice_batch_bo;
1546
1547     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1548         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1549         assert(0);
1550         return;
1551     }
1552
1553 #ifdef MFC_SOFTWARE_HASWELL
1554     slice_batch_bo = gen9_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1555 #else
1556     slice_batch_bo = gen9_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1557 #endif
1558
1559     // begin programing
1560     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1561     intel_batchbuffer_emit_mi_flush(batch);
1562
1563     // picture level programing
1564     gen9_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1565
1566     BEGIN_BCS_BATCH(batch, 3);
1567     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1568     OUT_BCS_RELOC(batch,
1569                   slice_batch_bo,
1570                   I915_GEM_DOMAIN_COMMAND, 0,
1571                   0);
1572     OUT_BCS_BATCH(batch, 0);
1573     ADVANCE_BCS_BATCH(batch);
1574
1575     // end programing
1576     intel_batchbuffer_end_atomic(batch);
1577
1578     dri_bo_unreference(slice_batch_bo);
1579 }
1580
1581
1582 static VAStatus
1583 gen9_mfc_avc_encode_picture(VADriverContextP ctx,
1584                             struct encode_state *encode_state,
1585                             struct intel_encoder_context *encoder_context)
1586 {
1587     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1588     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1589     int current_frame_bits_size;
1590     int sts;
1591
1592     for (;;) {
1593         gen9_mfc_init(ctx, encode_state, encoder_context);
1594         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1595         /*Programing bcs pipeline*/
1596         gen9_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
1597         gen9_mfc_run(ctx, encode_state, encoder_context);
1598         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1599             gen9_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1600             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1601             if (sts == BRC_NO_HRD_VIOLATION) {
1602                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1603                 break;
1604             }
1605             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1606                 if (!mfc_context->hrd.violation_noted) {
1607                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1608                     mfc_context->hrd.violation_noted = 1;
1609                 }
1610                 return VA_STATUS_SUCCESS;
1611             }
1612         } else {
1613             break;
1614         }
1615     }
1616
1617     return VA_STATUS_SUCCESS;
1618 }
1619
1620 /*
1621  * MPEG-2
1622  */
1623
1624 static const int
1625 va_to_gen9_mpeg2_picture_type[3] = {
1626     1,  /* I */
1627     2,  /* P */
1628     3   /* B */
1629 };
1630
1631 static void
1632 gen9_mfc_mpeg2_pic_state(VADriverContextP ctx,
1633                          struct intel_encoder_context *encoder_context,
1634                          struct encode_state *encode_state)
1635 {
1636     struct intel_batchbuffer *batch = encoder_context->base.batch;
1637     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1638     VAEncPictureParameterBufferMPEG2 *pic_param;
1639     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1640     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1641     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1642
1643     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1644     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1645     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1646
1647     BEGIN_BCS_BATCH(batch, 13);
1648     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1649     OUT_BCS_BATCH(batch,
1650                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1651                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1652                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1653                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1654                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1655                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1656                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1657                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1658                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1659                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1660                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1661                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1662     OUT_BCS_BATCH(batch,
1663                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1664                   va_to_gen9_mpeg2_picture_type[pic_param->picture_type] << 9 |
1665                   0);
1666     OUT_BCS_BATCH(batch,
1667                   1 << 31 |     /* slice concealment */
1668                   (height_in_mbs - 1) << 16 |
1669                   (width_in_mbs - 1));
1670
1671     if (slice_param && slice_param->quantiser_scale_code >= 14)
1672         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1673     else
1674         OUT_BCS_BATCH(batch, 0);
1675
1676     OUT_BCS_BATCH(batch, 0);
1677     OUT_BCS_BATCH(batch,
1678                   0xFFF << 16 | /* InterMBMaxSize */
1679                   0xFFF << 0 |  /* IntraMBMaxSize */
1680                   0);
1681     OUT_BCS_BATCH(batch, 0);
1682     OUT_BCS_BATCH(batch, 0);
1683     OUT_BCS_BATCH(batch, 0);
1684     OUT_BCS_BATCH(batch, 0);
1685     OUT_BCS_BATCH(batch, 0);
1686     OUT_BCS_BATCH(batch, 0);
1687     ADVANCE_BCS_BATCH(batch);
1688 }
1689
1690 static void
1691 gen9_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1692 {
1693     unsigned char intra_qm[64] = {
1694         8, 16, 19, 22, 26, 27, 29, 34,
1695         16, 16, 22, 24, 27, 29, 34, 37,
1696         19, 22, 26, 27, 29, 34, 34, 38,
1697         22, 22, 26, 27, 29, 34, 37, 40,
1698         22, 26, 27, 29, 32, 35, 40, 48,
1699         26, 27, 29, 32, 35, 40, 48, 58,
1700         26, 27, 29, 34, 38, 46, 56, 69,
1701         27, 29, 35, 38, 46, 56, 69, 83
1702     };
1703
1704     unsigned char non_intra_qm[64] = {
1705         16, 16, 16, 16, 16, 16, 16, 16,
1706         16, 16, 16, 16, 16, 16, 16, 16,
1707         16, 16, 16, 16, 16, 16, 16, 16,
1708         16, 16, 16, 16, 16, 16, 16, 16,
1709         16, 16, 16, 16, 16, 16, 16, 16,
1710         16, 16, 16, 16, 16, 16, 16, 16,
1711         16, 16, 16, 16, 16, 16, 16, 16,
1712         16, 16, 16, 16, 16, 16, 16, 16
1713     };
1714
1715     gen9_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1716     gen9_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1717 }
1718
1719 static void
1720 gen9_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1721 {
1722     unsigned short intra_fqm[64] = {
1723         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1724         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1725         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1726         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1727         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1728         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1729         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1730         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1731     };
1732
1733     unsigned short non_intra_fqm[64] = {
1734         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1735         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1736         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1737         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1738         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1739         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1740         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1741         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1742     };
1743
1744     gen9_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1745     gen9_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1746 }
1747
1748 static void
1749 gen9_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1750                                 struct intel_encoder_context *encoder_context,
1751                                 int x, int y,
1752                                 int next_x, int next_y,
1753                                 int is_fisrt_slice_group,
1754                                 int is_last_slice_group,
1755                                 int intra_slice,
1756                                 int qp,
1757                                 struct intel_batchbuffer *batch)
1758 {
1759     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1760
1761     if (batch == NULL)
1762         batch = encoder_context->base.batch;
1763
1764     BEGIN_BCS_BATCH(batch, 8);
1765
1766     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1767     OUT_BCS_BATCH(batch,
1768                   0 << 31 |                             /* MbRateCtrlFlag */
1769                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1770                   1 << 17 |                             /* Insert Header before the first slice group data */
1771                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1772                   1 << 15 |                             /* TailPresentFlag: always 1 */
1773                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1774                   !!intra_slice << 13 |                 /* IntraSlice */
1775                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1776                   0);
1777     OUT_BCS_BATCH(batch,
1778                   next_y << 24 |
1779                   next_x << 16 |
1780                   y << 8 |
1781                   x << 0 |
1782                   0);
1783     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1784     /* bitstream pointer is only loaded once for the first slice of a frame when
1785      * LoadSlicePointerFlag is 0
1786      */
1787     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1788     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1789     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1790     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1791
1792     ADVANCE_BCS_BATCH(batch);
1793 }
1794
1795 static int
1796 gen9_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1797                                 struct intel_encoder_context *encoder_context,
1798                                 int x, int y,
1799                                 int first_mb_in_slice,
1800                                 int last_mb_in_slice,
1801                                 int first_mb_in_slice_group,
1802                                 int last_mb_in_slice_group,
1803                                 int mb_type,
1804                                 int qp_scale_code,
1805                                 int coded_block_pattern,
1806                                 unsigned char target_size_in_word,
1807                                 unsigned char max_size_in_word,
1808                                 struct intel_batchbuffer *batch)
1809 {
1810     int len_in_dwords = 9;
1811
1812     if (batch == NULL)
1813         batch = encoder_context->base.batch;
1814
1815     BEGIN_BCS_BATCH(batch, len_in_dwords);
1816
1817     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1818     OUT_BCS_BATCH(batch,
1819                   0 << 24 |     /* PackedMvNum */
1820                   0 << 20 |     /* MvFormat */
1821                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1822                   0 << 15 |     /* TransformFlag: frame DCT */
1823                   0 << 14 |     /* FieldMbFlag */
1824                   1 << 13 |     /* IntraMbFlag */
1825                   mb_type << 8 |   /* MbType: Intra */
1826                   0 << 2 |      /* SkipMbFlag */
1827                   0 << 0 |      /* InterMbMode */
1828                   0);
1829     OUT_BCS_BATCH(batch, y << 16 | x);
1830     OUT_BCS_BATCH(batch,
1831                   max_size_in_word << 24 |
1832                   target_size_in_word << 16 |
1833                   coded_block_pattern << 6 |      /* CBP */
1834                   0);
1835     OUT_BCS_BATCH(batch,
1836                   last_mb_in_slice << 31 |
1837                   first_mb_in_slice << 30 |
1838                   0 << 27 |     /* EnableCoeffClamp */
1839                   last_mb_in_slice_group << 26 |
1840                   0 << 25 |     /* MbSkipConvDisable */
1841                   first_mb_in_slice_group << 24 |
1842                   0 << 16 |     /* MvFieldSelect */
1843                   qp_scale_code << 0 |
1844                   0);
1845     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1846     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1847     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1848     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1849
1850     ADVANCE_BCS_BATCH(batch);
1851
1852     return len_in_dwords;
1853 }
1854
1855 /* Byte offset */
1856 #define MPEG2_INTER_MV_OFFSET   48
1857
1858 static struct _mv_ranges
1859 {
1860     int low;    /* in the unit of 1/2 pixel */
1861     int high;   /* in the unit of 1/2 pixel */
1862 } mv_ranges[] = {
1863     {0, 0},
1864     {-16, 15},
1865     {-32, 31},
1866     {-64, 63},
1867     {-128, 127},
1868     {-256, 255},
1869     {-512, 511},
1870     {-1024, 1023},
1871     {-2048, 2047},
1872     {-4096, 4095}
1873 };
1874
1875 static int
1876 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1877 {
1878     if (mv + pos * 16 * 2 < 0 ||
1879         mv + (pos + 1) * 16 * 2 > display_max * 2)
1880         mv = 0;
1881
1882     if (f_code > 0 && f_code < 10) {
1883         if (mv < mv_ranges[f_code].low)
1884             mv = mv_ranges[f_code].low;
1885
1886         if (mv > mv_ranges[f_code].high)
1887             mv = mv_ranges[f_code].high;
1888     }
1889
1890     return mv;
1891 }
1892
1893 static int
1894 gen9_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
1895                                 struct encode_state *encode_state,
1896                                 struct intel_encoder_context *encoder_context,
1897                                 unsigned int *msg,
1898                                 int width_in_mbs, int height_in_mbs,
1899                                 int x, int y,
1900                                 int first_mb_in_slice,
1901                                 int last_mb_in_slice,
1902                                 int first_mb_in_slice_group,
1903                                 int last_mb_in_slice_group,
1904                                 int qp_scale_code,
1905                                 unsigned char target_size_in_word,
1906                                 unsigned char max_size_in_word,
1907                                 struct intel_batchbuffer *batch)
1908 {
1909     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1910     int len_in_dwords = 9;
1911     short *mvptr, mvx0, mvy0, mvx1, mvy1;
1912
1913     if (batch == NULL)
1914         batch = encoder_context->base.batch;
1915
1916     mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
1917     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
1918     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
1919     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
1920     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
1921
1922     BEGIN_BCS_BATCH(batch, len_in_dwords);
1923
1924     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1925     OUT_BCS_BATCH(batch,
1926                   2 << 24 |     /* PackedMvNum */
1927                   7 << 20 |     /* MvFormat */
1928                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1929                   0 << 15 |     /* TransformFlag: frame DCT */
1930                   0 << 14 |     /* FieldMbFlag */
1931                   0 << 13 |     /* IntraMbFlag */
1932                   1 << 8 |      /* MbType: Frame-based */
1933                   0 << 2 |      /* SkipMbFlag */
1934                   0 << 0 |      /* InterMbMode */
1935                   0);
1936     OUT_BCS_BATCH(batch, y << 16 | x);
1937     OUT_BCS_BATCH(batch,
1938                   max_size_in_word << 24 |
1939                   target_size_in_word << 16 |
1940                   0x3f << 6 |   /* CBP */
1941                   0);
1942     OUT_BCS_BATCH(batch,
1943                   last_mb_in_slice << 31 |
1944                   first_mb_in_slice << 30 |
1945                   0 << 27 |     /* EnableCoeffClamp */
1946                   last_mb_in_slice_group << 26 |
1947                   0 << 25 |     /* MbSkipConvDisable */
1948                   first_mb_in_slice_group << 24 |
1949                   0 << 16 |     /* MvFieldSelect */
1950                   qp_scale_code << 0 |
1951                   0);
1952
1953     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
1954     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
1955     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1956     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1957
1958     ADVANCE_BCS_BATCH(batch);
1959
1960     return len_in_dwords;
1961 }
1962
1963 static void
1964 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
1965                                            struct encode_state *encode_state,
1966                                            struct intel_encoder_context *encoder_context,
1967                                            struct intel_batchbuffer *slice_batch)
1968 {
1969     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1970     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
1971
1972     if (encode_state->packed_header_data[idx]) {
1973         VAEncPackedHeaderParameterBuffer *param = NULL;
1974         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1975         unsigned int length_in_bits;
1976
1977         assert(encode_state->packed_header_param[idx]);
1978         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1979         length_in_bits = param->bit_length;
1980
1981         mfc_context->insert_object(ctx,
1982                                    encoder_context,
1983                                    header_data,
1984                                    ALIGN(length_in_bits, 32) >> 5,
1985                                    length_in_bits & 0x1f,
1986                                    5,   /* FIXME: check it */
1987                                    0,
1988                                    0,
1989                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
1990                                    slice_batch);
1991     }
1992
1993     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
1994
1995     if (encode_state->packed_header_data[idx]) {
1996         VAEncPackedHeaderParameterBuffer *param = NULL;
1997         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1998         unsigned int length_in_bits;
1999
2000         assert(encode_state->packed_header_param[idx]);
2001         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2002         length_in_bits = param->bit_length;
2003
2004         mfc_context->insert_object(ctx,
2005                                    encoder_context,
2006                                    header_data,
2007                                    ALIGN(length_in_bits, 32) >> 5,
2008                                    length_in_bits & 0x1f,
2009                                    5,   /* FIXME: check it */
2010                                    0,
2011                                    0,
2012                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2013                                    slice_batch);
2014     }
2015 }
2016
2017 static void
2018 gen9_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2019                                     struct encode_state *encode_state,
2020                                     struct intel_encoder_context *encoder_context,
2021                                     int slice_index,
2022                                     VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2023                                     struct intel_batchbuffer *slice_batch)
2024 {
2025     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2026     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2027     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2028     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2029     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2030     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2031     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2032     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2033     int i, j;
2034     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2035     unsigned int *msg = NULL;
2036     unsigned char *msg_ptr = NULL;
2037
2038     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2039     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2040     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2041     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2042
2043     dri_bo_map(vme_context->vme_output.bo , 0);
2044     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2045
2046     if (next_slice_group_param) {
2047         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2048         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2049     } else {
2050         h_next_start_pos = 0;
2051         v_next_start_pos = height_in_mbs;
2052     }
2053
2054     gen9_mfc_mpeg2_slicegroup_state(ctx,
2055                                     encoder_context,
2056                                     h_start_pos,
2057                                     v_start_pos,
2058                                     h_next_start_pos,
2059                                     v_next_start_pos,
2060                                     slice_index == 0,
2061                                     next_slice_group_param == NULL,
2062                                     slice_param->is_intra_slice,
2063                                     slice_param->quantiser_scale_code,
2064                                     slice_batch);
2065
2066     if (slice_index == 0)
2067         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2068
2069     /* Insert '00' to make sure the header is valid */
2070     mfc_context->insert_object(ctx,
2071                                encoder_context,
2072                                (unsigned int*)section_delimiter,
2073                                1,
2074                                8,   /* 8bits in the last DWORD */
2075                                1,   /* 1 byte */
2076                                1,
2077                                0,
2078                                0,
2079                                slice_batch);
2080
2081     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2082         /* PAK for each macroblocks */
2083         for (j = 0; j < slice_param->num_macroblocks; j++) {
2084             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2085             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2086             int first_mb_in_slice = (j == 0);
2087             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2088             int first_mb_in_slice_group = (i == 0 && j == 0);
2089             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2090                                           j == slice_param->num_macroblocks - 1);
2091
2092             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2093
2094             if (slice_param->is_intra_slice) {
2095                 gen9_mfc_mpeg2_pak_object_intra(ctx,
2096                                                 encoder_context,
2097                                                 h_pos, v_pos,
2098                                                 first_mb_in_slice,
2099                                                 last_mb_in_slice,
2100                                                 first_mb_in_slice_group,
2101                                                 last_mb_in_slice_group,
2102                                                 0x1a,
2103                                                 slice_param->quantiser_scale_code,
2104                                                 0x3f,
2105                                                 0,
2106                                                 0xff,
2107                                                 slice_batch);
2108             } else {
2109                 int inter_rdo, intra_rdo;
2110                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2111                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2112
2113                 if (intra_rdo < inter_rdo)
2114                     gen9_mfc_mpeg2_pak_object_intra(ctx,
2115                                                      encoder_context,
2116                                                      h_pos, v_pos,
2117                                                      first_mb_in_slice,
2118                                                      last_mb_in_slice,
2119                                                      first_mb_in_slice_group,
2120                                                      last_mb_in_slice_group,
2121                                                      0x1a,
2122                                                      slice_param->quantiser_scale_code,
2123                                                      0x3f,
2124                                                      0,
2125                                                      0xff,
2126                                                      slice_batch);
2127                 else
2128                     gen9_mfc_mpeg2_pak_object_inter(ctx,
2129                                                 encode_state,
2130                                                 encoder_context,
2131                                                 msg,
2132                                                 width_in_mbs, height_in_mbs,
2133                                                 h_pos, v_pos,
2134                                                 first_mb_in_slice,
2135                                                 last_mb_in_slice,
2136                                                 first_mb_in_slice_group,
2137                                                 last_mb_in_slice_group,
2138                                                 slice_param->quantiser_scale_code,
2139                                                 0,
2140                                                 0xff,
2141                                                 slice_batch);
2142             }
2143         }
2144
2145         slice_param++;
2146     }
2147
2148     dri_bo_unmap(vme_context->vme_output.bo);
2149
2150     /* tail data */
2151     if (next_slice_group_param == NULL) { /* end of a picture */
2152         mfc_context->insert_object(ctx,
2153                                    encoder_context,
2154                                    (unsigned int *)tail_delimiter,
2155                                    2,
2156                                    8,   /* 8bits in the last DWORD */
2157                                    5,   /* 5 bytes */
2158                                    1,
2159                                    1,
2160                                    0,
2161                                    slice_batch);
2162     } else {        /* end of a lsice group */
2163         mfc_context->insert_object(ctx,
2164                                    encoder_context,
2165                                    (unsigned int *)section_delimiter,
2166                                    1,
2167                                    8,   /* 8bits in the last DWORD */
2168                                    1,   /* 1 byte */
2169                                    1,
2170                                    1,
2171                                    0,
2172                                    slice_batch);
2173     }
2174 }
2175
2176 /*
2177  * A batch buffer for all slices, including slice state
2178  * slice insert object and slice pak object commands
2179  */
2180 static dri_bo *
2181 gen9_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2182                                           struct encode_state *encode_state,
2183                                           struct intel_encoder_context *encoder_context)
2184 {
2185     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2186     struct intel_batchbuffer *batch;
2187     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2188     dri_bo *batch_bo;
2189     int i;
2190
2191     batch = mfc_context->aux_batchbuffer;
2192     batch_bo = batch->buffer;
2193
2194     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2195         if (i == encode_state->num_slice_params_ext - 1)
2196             next_slice_group_param = NULL;
2197         else
2198             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2199
2200         gen9_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2201     }
2202
2203     intel_batchbuffer_align(batch, 8);
2204
2205     BEGIN_BCS_BATCH(batch, 2);
2206     OUT_BCS_BATCH(batch, 0);
2207     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2208     ADVANCE_BCS_BATCH(batch);
2209
2210     dri_bo_reference(batch_bo);
2211     intel_batchbuffer_free(batch);
2212     mfc_context->aux_batchbuffer = NULL;
2213
2214     return batch_bo;
2215 }
2216
2217 static void
2218 gen9_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2219                                            struct encode_state *encode_state,
2220                                            struct intel_encoder_context *encoder_context)
2221 {
2222     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2223
2224     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2225     mfc_context->set_surface_state(ctx, encoder_context);
2226     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2227     gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
2228     gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2229     gen9_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2230     gen9_mfc_mpeg2_qm_state(ctx, encoder_context);
2231     gen9_mfc_mpeg2_fqm_state(ctx, encoder_context);
2232 }
2233
2234 static void
2235 gen9_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2236                                    struct encode_state *encode_state,
2237                                    struct intel_encoder_context *encoder_context)
2238 {
2239     struct intel_batchbuffer *batch = encoder_context->base.batch;
2240     dri_bo *slice_batch_bo;
2241
2242     slice_batch_bo = gen9_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2243
2244     // begin programing
2245     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
2246     intel_batchbuffer_emit_mi_flush(batch);
2247
2248     // picture level programing
2249     gen9_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2250
2251     BEGIN_BCS_BATCH(batch, 4);
2252     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2253     OUT_BCS_RELOC(batch,
2254                   slice_batch_bo,
2255                   I915_GEM_DOMAIN_COMMAND, 0,
2256                   0);
2257     OUT_BCS_BATCH(batch, 0);
2258     OUT_BCS_BATCH(batch, 0);
2259     ADVANCE_BCS_BATCH(batch);
2260
2261     // end programing
2262     intel_batchbuffer_end_atomic(batch);
2263
2264     dri_bo_unreference(slice_batch_bo);
2265 }
2266
2267 static VAStatus
2268 intel_mfc_mpeg2_prepare(VADriverContextP ctx,
2269                         struct encode_state *encode_state,
2270                         struct intel_encoder_context *encoder_context)
2271 {
2272     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2273     struct object_surface *obj_surface;
2274     struct object_buffer *obj_buffer;
2275     struct i965_coded_buffer_segment *coded_buffer_segment;
2276     VAStatus vaStatus = VA_STATUS_SUCCESS;
2277     dri_bo *bo;
2278     int i;
2279
2280     /* reconstructed surface */
2281     obj_surface = encode_state->reconstructed_object;
2282     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2283     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2284     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2285     mfc_context->surface_state.width = obj_surface->orig_width;
2286     mfc_context->surface_state.height = obj_surface->orig_height;
2287     mfc_context->surface_state.w_pitch = obj_surface->width;
2288     mfc_context->surface_state.h_pitch = obj_surface->height;
2289
2290     /* forward reference */
2291     obj_surface = encode_state->reference_objects[0];
2292
2293     if (obj_surface && obj_surface->bo) {
2294         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2295         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2296     } else
2297         mfc_context->reference_surfaces[0].bo = NULL;
2298
2299     /* backward reference */
2300     obj_surface = encode_state->reference_objects[1];
2301
2302     if (obj_surface && obj_surface->bo) {
2303         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2304         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2305     } else {
2306         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2307
2308         if (mfc_context->reference_surfaces[1].bo)
2309             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2310     }
2311
2312     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2313         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2314
2315         if (mfc_context->reference_surfaces[i].bo)
2316             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2317     }
2318
2319     /* input YUV surface */
2320     obj_surface = encode_state->input_yuv_object;
2321     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2322     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2323
2324     /* coded buffer */
2325     obj_buffer = encode_state->coded_buf_object;
2326     bo = obj_buffer->buffer_store->bo;
2327     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2328     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2329     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2330     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2331
2332     /* set the internal flag to 0 to indicate the coded size is unknown */
2333     dri_bo_map(bo, 1);
2334     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2335     coded_buffer_segment->mapped = 0;
2336     coded_buffer_segment->codec = encoder_context->codec;
2337     dri_bo_unmap(bo);
2338
2339     return vaStatus;
2340 }
2341
2342 static VAStatus
2343 gen9_mfc_mpeg2_encode_picture(VADriverContextP ctx,
2344                               struct encode_state *encode_state,
2345                               struct intel_encoder_context *encoder_context)
2346 {
2347     gen9_mfc_init(ctx, encode_state, encoder_context);
2348     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2349     /*Programing bcs pipeline*/
2350     gen9_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2351     gen9_mfc_run(ctx, encode_state, encoder_context);
2352
2353     return VA_STATUS_SUCCESS;
2354 }
2355
2356 static void
2357 gen9_mfc_context_destroy(void *context)
2358 {
2359     struct gen6_mfc_context *mfc_context = context;
2360     int i;
2361
2362     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2363     mfc_context->post_deblocking_output.bo = NULL;
2364
2365     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2366     mfc_context->pre_deblocking_output.bo = NULL;
2367
2368     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2369     mfc_context->uncompressed_picture_source.bo = NULL;
2370
2371     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
2372     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2373
2374     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2375         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2376         mfc_context->direct_mv_buffers[i].bo = NULL;
2377     }
2378
2379     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2380     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2381
2382     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2383     mfc_context->macroblock_status_buffer.bo = NULL;
2384
2385     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2386     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2387
2388     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2389     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2390
2391
2392     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2393         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2394         mfc_context->reference_surfaces[i].bo = NULL;
2395     }
2396
2397     i965_gpe_context_destroy(&mfc_context->gpe_context);
2398
2399     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2400     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2401
2402     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2403     mfc_context->aux_batchbuffer_surface.bo = NULL;
2404
2405     if (mfc_context->aux_batchbuffer)
2406         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2407
2408     mfc_context->aux_batchbuffer = NULL;
2409
2410     free(mfc_context);
2411 }
2412
2413 static VAStatus gen9_mfc_pipeline(VADriverContextP ctx,
2414                                   VAProfile profile,
2415                                   struct encode_state *encode_state,
2416                                   struct intel_encoder_context *encoder_context)
2417 {
2418     VAStatus vaStatus;
2419
2420     switch (profile) {
2421     case VAProfileH264ConstrainedBaseline:
2422     case VAProfileH264Main:
2423     case VAProfileH264High:
2424     case VAProfileH264MultiviewHigh:
2425     case VAProfileH264StereoHigh:
2426         vaStatus = gen9_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2427         break;
2428
2429         /* FIXME: add for other profile */
2430     case VAProfileMPEG2Simple:
2431     case VAProfileMPEG2Main:
2432         vaStatus = gen9_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2433         break;
2434
2435     default:
2436         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2437         break;
2438     }
2439
2440     return vaStatus;
2441 }
2442
2443 Bool gen9_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2444 {
2445     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2446
2447     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2448
2449     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2450     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2451
2452     mfc_context->gpe_context.curbe.length = 32 * 4;
2453
2454     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2455     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2456     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2457     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2458     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2459
2460     i965_gpe_load_kernels(ctx,
2461                           &mfc_context->gpe_context,
2462                           gen9_mfc_kernels,
2463                           NUM_MFC_KERNEL);
2464
2465     mfc_context->pipe_mode_select = gen9_mfc_pipe_mode_select;
2466     mfc_context->set_surface_state = gen9_mfc_surface_state;
2467     mfc_context->ind_obj_base_addr_state = gen9_mfc_ind_obj_base_addr_state;
2468     mfc_context->avc_img_state = gen9_mfc_avc_img_state;
2469     mfc_context->avc_qm_state = gen9_mfc_avc_qm_state;
2470     mfc_context->avc_fqm_state = gen9_mfc_avc_fqm_state;
2471     mfc_context->insert_object = gen9_mfc_avc_insert_object;
2472     mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2473
2474     encoder_context->mfc_context = mfc_context;
2475     encoder_context->mfc_context_destroy = gen9_mfc_context_destroy;
2476     encoder_context->mfc_pipeline = gen9_mfc_pipeline;
2477     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2478
2479     return True;
2480 }