OSDN Git Service

VP8 HWEnc: Calc vp8 coded size by internal buffer
[android-x86/hardware-intel-common-vaapi.git] / src / gen9_mfc.c
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45 #include "vp8_probs.h"
46
47 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
48 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
49 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
50
51 #define MFC_SOFTWARE_HASWELL    1
52
53 #define B0_STEP_REV             2
54 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
55
56 static const uint32_t gen9_mfc_batchbuffer_avc_intra[][4] = {
57 #include "shaders/utils/mfc_batchbuffer_avc_intra.g9b"
58 };
59
60 static const uint32_t gen9_mfc_batchbuffer_avc_inter[][4] = {
61 #include "shaders/utils/mfc_batchbuffer_avc_inter.g9b"
62 };
63
64 static struct i965_kernel gen9_mfc_kernels[] = {
65     {
66         "MFC AVC INTRA BATCHBUFFER ",
67         MFC_BATCHBUFFER_AVC_INTRA,
68         gen9_mfc_batchbuffer_avc_intra,
69         sizeof(gen9_mfc_batchbuffer_avc_intra),
70         NULL
71     },
72
73     {
74         "MFC AVC INTER BATCHBUFFER ",
75         MFC_BATCHBUFFER_AVC_INTER,
76         gen9_mfc_batchbuffer_avc_inter,
77         sizeof(gen9_mfc_batchbuffer_avc_inter),
78         NULL
79     },
80 };
81
82 #define         INTER_MODE_MASK         0x03
83 #define         INTER_8X8               0x03
84 #define         INTER_16X8              0x01
85 #define         INTER_8X16              0x02
86 #define         SUBMB_SHAPE_MASK        0x00FF00
87
88 #define         INTER_MV8               (4 << 20)
89 #define         INTER_MV32              (6 << 20)
90
91 static void
92 gen9_mfc_pipe_mode_select(VADriverContextP ctx,
93                           int standard_select,
94                           struct intel_encoder_context *encoder_context)
95 {
96     struct intel_batchbuffer *batch = encoder_context->base.batch;
97     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
98
99     assert(standard_select == MFX_FORMAT_MPEG2 ||
100            standard_select == MFX_FORMAT_AVC  ||
101            standard_select == MFX_FORMAT_VP8);
102
103     BEGIN_BCS_BATCH(batch, 5);
104
105     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
106     OUT_BCS_BATCH(batch,
107                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
108                   (MFD_MODE_VLD << 15) | /* VLD mode */
109                   (0 << 10) | /* Stream-Out Enable */
110                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
111                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
112                   (0 << 6)  | /* frame statistics stream-out enable*/
113                   (0 << 5)  | /* not in stitch mode */
114                   (1 << 4)  | /* encoding mode */
115                   (standard_select << 0));  /* standard select: avc or mpeg2 */
116     OUT_BCS_BATCH(batch,
117                   (0 << 7)  | /* expand NOA bus flag */
118                   (0 << 6)  | /* disable slice-level clock gating */
119                   (0 << 5)  | /* disable clock gating for NOA */
120                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
121                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
122                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
123                   (0 << 1)  |
124                   (0 << 0));
125     OUT_BCS_BATCH(batch, 0);
126     OUT_BCS_BATCH(batch, 0);
127
128     ADVANCE_BCS_BATCH(batch);
129 }
130
131 static void
132 gen9_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
133 {
134     struct intel_batchbuffer *batch = encoder_context->base.batch;
135     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
136
137     BEGIN_BCS_BATCH(batch, 6);
138
139     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
140     OUT_BCS_BATCH(batch, 0);
141     OUT_BCS_BATCH(batch,
142                   ((mfc_context->surface_state.height - 1) << 18) |
143                   ((mfc_context->surface_state.width - 1) << 4));
144     OUT_BCS_BATCH(batch,
145                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
146                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
147                   (0 << 22) | /* surface object control state, FIXME??? */
148                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
149                   (0 << 2)  | /* must be 0 for interleave U/V */
150                   (1 << 1)  | /* must be tiled */
151                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
152     OUT_BCS_BATCH(batch,
153                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
154                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
155     OUT_BCS_BATCH(batch, 0);
156
157     ADVANCE_BCS_BATCH(batch);
158 }
159
160 static void
161 gen9_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
162                                  struct intel_encoder_context *encoder_context)
163 {
164     struct intel_batchbuffer *batch = encoder_context->base.batch;
165     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
166     struct gen6_vme_context *vme_context = encoder_context->vme_context;
167     int vme_size;
168
169     BEGIN_BCS_BATCH(batch, 26);
170
171     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
172     /* the DW1-3 is for the MFX indirect bistream offset */
173     OUT_BCS_BATCH(batch, 0);
174     OUT_BCS_BATCH(batch, 0);
175     OUT_BCS_BATCH(batch, 0);
176
177     vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
178
179     /* the DW4-5 is the MFX upper bound */
180     if (encoder_context->codec == CODEC_VP8) {
181         OUT_BCS_RELOC(batch,
182                 mfc_context->mfc_indirect_pak_bse_object.bo,
183                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
184                 mfc_context->mfc_indirect_pak_bse_object.end_offset);
185         OUT_BCS_BATCH(batch, 0);
186         /* the DW6-10 is for MFX Indirect MV Object Base Address */
187         OUT_BCS_BATCH(batch, 0);
188         OUT_BCS_BATCH(batch, 0);
189         OUT_BCS_BATCH(batch, 0);
190         OUT_BCS_BATCH(batch, 0);
191         OUT_BCS_BATCH(batch, 0);
192     } else {
193         OUT_BCS_BATCH(batch, 0);
194         OUT_BCS_BATCH(batch, 0);
195         /* the DW6-10 is for MFX Indirect MV Object Base Address */
196         OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
197         OUT_BCS_BATCH(batch, 0);
198         OUT_BCS_BATCH(batch, 0);
199         OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
200         OUT_BCS_BATCH(batch, 0);
201     }
202
203     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
204     OUT_BCS_BATCH(batch, 0);
205     OUT_BCS_BATCH(batch, 0);
206     OUT_BCS_BATCH(batch, 0);
207     OUT_BCS_BATCH(batch, 0);
208     OUT_BCS_BATCH(batch, 0);
209
210     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
211     OUT_BCS_BATCH(batch, 0);
212     OUT_BCS_BATCH(batch, 0);
213     OUT_BCS_BATCH(batch, 0);
214     OUT_BCS_BATCH(batch, 0);
215     OUT_BCS_BATCH(batch, 0);
216
217     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
218     OUT_BCS_RELOC(batch,
219                   mfc_context->mfc_indirect_pak_bse_object.bo,
220                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
221                   0);
222     OUT_BCS_BATCH(batch, 0);
223     OUT_BCS_BATCH(batch, 0);
224
225     OUT_BCS_RELOC(batch,
226                   mfc_context->mfc_indirect_pak_bse_object.bo,
227                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
228                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
229     OUT_BCS_BATCH(batch, 0);
230
231     ADVANCE_BCS_BATCH(batch);
232 }
233
234 static void
235 gen9_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
236                        struct intel_encoder_context *encoder_context)
237 {
238     struct intel_batchbuffer *batch = encoder_context->base.batch;
239     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
240     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
241
242     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
243     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
244
245     BEGIN_BCS_BATCH(batch, 16);
246
247     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
248     /*DW1. MB setting of frame */
249     OUT_BCS_BATCH(batch,
250                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
251     OUT_BCS_BATCH(batch,
252                   ((height_in_mbs - 1) << 16) |
253                   ((width_in_mbs - 1) << 0));
254     /* DW3 QP setting */
255     OUT_BCS_BATCH(batch,
256                   (0 << 24) |   /* Second Chroma QP Offset */
257                   (0 << 16) |   /* Chroma QP Offset */
258                   (0 << 14) |   /* Max-bit conformance Intra flag */
259                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
260                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
261                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
262                   (0 << 8)  |   /* FIXME: Image Structure */
263                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
264     OUT_BCS_BATCH(batch,
265                   (0 << 16) |   /* Mininum Frame size */
266                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
267                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
268                   (0 << 13) |   /* CABAC 0 word insertion test enable */
269                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
270                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
271                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
272                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
273                   (0 << 6)  |   /* Only valid for VLD decoding mode */
274                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
275                   (0 << 4)  |   /* Direct 8x8 inference flag */
276                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
277                   (1 << 2)  |   /* Frame MB only flag */
278                   (0 << 1)  |   /* MBAFF mode is in active */
279                   (0 << 0));    /* Field picture flag */
280     /* DW5 Trellis quantization */
281     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
282     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
283                   (0xBB8 << 16) |       /* InterMbMaxSz */
284                   (0xEE8) );            /* IntraMbMaxSz */
285     OUT_BCS_BATCH(batch, 0);            /* Reserved */
286     /* DW8. QP delta */
287     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
288     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
289     /* DW10. Bit setting for MB */
290     OUT_BCS_BATCH(batch, 0x8C000000);
291     OUT_BCS_BATCH(batch, 0x00010000);
292     /* DW12. */
293     OUT_BCS_BATCH(batch, 0);
294     OUT_BCS_BATCH(batch, 0x02010100);
295     /* DW14. For short format */
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298
299     ADVANCE_BCS_BATCH(batch);
300 }
301
302 static void
303 gen9_mfc_qm_state(VADriverContextP ctx,
304                   int qm_type,
305                   unsigned int *qm,
306                   int qm_length,
307                   struct intel_encoder_context *encoder_context)
308 {
309     struct intel_batchbuffer *batch = encoder_context->base.batch;
310     unsigned int qm_buffer[16];
311
312     assert(qm_length <= 16);
313     assert(sizeof(*qm) == 4);
314     memcpy(qm_buffer, qm, qm_length * 4);
315
316     BEGIN_BCS_BATCH(batch, 18);
317     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
318     OUT_BCS_BATCH(batch, qm_type << 0);
319     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
320     ADVANCE_BCS_BATCH(batch);
321 }
322
323 static void
324 gen9_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
325 {
326     unsigned int qm[16] = {
327         0x10101010, 0x10101010, 0x10101010, 0x10101010,
328         0x10101010, 0x10101010, 0x10101010, 0x10101010,
329         0x10101010, 0x10101010, 0x10101010, 0x10101010,
330         0x10101010, 0x10101010, 0x10101010, 0x10101010
331     };
332
333     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
334     gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
335     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
336     gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
337 }
338
339 static void
340 gen9_mfc_fqm_state(VADriverContextP ctx,
341                    int fqm_type,
342                    unsigned int *fqm,
343                    int fqm_length,
344                    struct intel_encoder_context *encoder_context)
345 {
346     struct intel_batchbuffer *batch = encoder_context->base.batch;
347     unsigned int fqm_buffer[32];
348
349     assert(fqm_length <= 32);
350     assert(sizeof(*fqm) == 4);
351     memcpy(fqm_buffer, fqm, fqm_length * 4);
352
353     BEGIN_BCS_BATCH(batch, 34);
354     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
355     OUT_BCS_BATCH(batch, fqm_type << 0);
356     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
357     ADVANCE_BCS_BATCH(batch);
358 }
359
360 static void
361 gen9_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
362 {
363     unsigned int qm[32] = {
364         0x10001000, 0x10001000, 0x10001000, 0x10001000,
365         0x10001000, 0x10001000, 0x10001000, 0x10001000,
366         0x10001000, 0x10001000, 0x10001000, 0x10001000,
367         0x10001000, 0x10001000, 0x10001000, 0x10001000,
368         0x10001000, 0x10001000, 0x10001000, 0x10001000,
369         0x10001000, 0x10001000, 0x10001000, 0x10001000,
370         0x10001000, 0x10001000, 0x10001000, 0x10001000,
371         0x10001000, 0x10001000, 0x10001000, 0x10001000
372     };
373
374     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
375     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
376     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
377     gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
378 }
379
380 static void
381 gen9_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
382                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
383                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
384                            struct intel_batchbuffer *batch)
385 {
386     if (batch == NULL)
387         batch = encoder_context->base.batch;
388
389     if (data_bits_in_last_dw == 0)
390         data_bits_in_last_dw = 32;
391
392     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
393
394     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
395     OUT_BCS_BATCH(batch,
396                   (0 << 16) |   /* always start at offset 0 */
397                   (data_bits_in_last_dw << 8) |
398                   (skip_emul_byte_count << 4) |
399                   (!!emulation_flag << 3) |
400                   ((!!is_last_header) << 2) |
401                   ((!!is_end_of_slice) << 1) |
402                   (0 << 0));    /* FIXME: ??? */
403     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
404
405     ADVANCE_BCS_BATCH(batch);
406 }
407
408
409 static void gen9_mfc_init(VADriverContextP ctx,
410                           struct encode_state *encode_state,
411                           struct intel_encoder_context *encoder_context)
412 {
413     struct i965_driver_data *i965 = i965_driver_data(ctx);
414     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
415     dri_bo *bo;
416     int i;
417     int width_in_mbs = 0;
418     int height_in_mbs = 0;
419     int slice_batchbuffer_size;
420
421     if (encoder_context->codec == CODEC_H264 ||
422         encoder_context->codec == CODEC_H264_MVC) {
423         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
424         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
425         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
426     } else {
427         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
428
429         assert(encoder_context->codec == CODEC_MPEG2);
430
431         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
432         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
433     }
434
435     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
436                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
437
438     /*Encode common setup for MFC*/
439     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
440     mfc_context->post_deblocking_output.bo = NULL;
441
442     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
443     mfc_context->pre_deblocking_output.bo = NULL;
444
445     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
446     mfc_context->uncompressed_picture_source.bo = NULL;
447
448     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
449     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
450
451     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
452         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
453         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
454         mfc_context->direct_mv_buffers[i].bo = NULL;
455     }
456
457     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
458         if (mfc_context->reference_surfaces[i].bo != NULL)
459             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
460         mfc_context->reference_surfaces[i].bo = NULL;
461     }
462
463     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
464     bo = dri_bo_alloc(i965->intel.bufmgr,
465                       "Buffer",
466                       width_in_mbs * 64,
467                       64);
468     assert(bo);
469     mfc_context->intra_row_store_scratch_buffer.bo = bo;
470
471     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
472     bo = dri_bo_alloc(i965->intel.bufmgr,
473                       "Buffer",
474                       width_in_mbs * height_in_mbs * 16,
475                       64);
476     assert(bo);
477     mfc_context->macroblock_status_buffer.bo = bo;
478
479     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
480     bo = dri_bo_alloc(i965->intel.bufmgr,
481                       "Buffer",
482                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
483                       64);
484     assert(bo);
485     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
486
487     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
488     bo = dri_bo_alloc(i965->intel.bufmgr,
489                       "Buffer",
490                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
491                       0x1000);
492     assert(bo);
493     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
494
495     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
496     mfc_context->mfc_batchbuffer_surface.bo = NULL;
497
498     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
499     mfc_context->aux_batchbuffer_surface.bo = NULL;
500
501     if (mfc_context->aux_batchbuffer)
502         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
503
504     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
505     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
506     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
507     mfc_context->aux_batchbuffer_surface.pitch = 16;
508     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
509     mfc_context->aux_batchbuffer_surface.size_block = 16;
510
511     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
512 }
513
514 static void
515 gen9_mfc_pipe_buf_addr_state(VADriverContextP ctx,
516                              struct intel_encoder_context *encoder_context)
517 {
518     struct intel_batchbuffer *batch = encoder_context->base.batch;
519     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
520     int i;
521
522     BEGIN_BCS_BATCH(batch, 61);
523
524     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
525
526     /* the DW1-3 is for pre_deblocking */
527     if (mfc_context->pre_deblocking_output.bo)
528         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
529                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
530                       0);
531     else
532         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
533
534     OUT_BCS_BATCH(batch, 0);
535     OUT_BCS_BATCH(batch, 0);
536     /* the DW4-6 is for the post_deblocking */
537
538     /* post output addr  */
539     if (mfc_context->post_deblocking_output.bo)
540         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
541                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
542                       0);
543     else
544         OUT_BCS_BATCH(batch, 0);
545
546     OUT_BCS_BATCH(batch, 0);
547     OUT_BCS_BATCH(batch, 0);
548
549     /* the DW7-9 is for the uncompressed_picture */
550     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
551                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
552                   0); /* uncompressed data */
553
554     OUT_BCS_BATCH(batch, 0);
555     OUT_BCS_BATCH(batch, 0);
556
557     /* the DW10-12 is for the mb status */
558     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
559                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
560                   0); /* StreamOut data*/
561
562     OUT_BCS_BATCH(batch, 0);
563     OUT_BCS_BATCH(batch, 0);
564
565     /* the DW13-15 is for the intra_row_store_scratch */
566     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
567                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
568                   0);
569
570     OUT_BCS_BATCH(batch, 0);
571     OUT_BCS_BATCH(batch, 0);
572
573     /* the DW16-18 is for the deblocking filter */
574     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
575                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
576                   0);
577
578     OUT_BCS_BATCH(batch, 0);
579     OUT_BCS_BATCH(batch, 0);
580
581     /* the DW 19-50 is for Reference pictures*/
582     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
583         if ( mfc_context->reference_surfaces[i].bo != NULL) {
584             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
585                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
586                           0);
587         } else {
588             OUT_BCS_BATCH(batch, 0);
589         }
590
591         OUT_BCS_BATCH(batch, 0);
592     }
593
594     OUT_BCS_BATCH(batch, 0);
595
596     /* The DW 52-54 is for the MB status buffer */
597     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
598                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
599                   0);
600
601     OUT_BCS_BATCH(batch, 0);
602     OUT_BCS_BATCH(batch, 0);
603
604     /* the DW 55-57 is the ILDB buffer */
605     OUT_BCS_BATCH(batch, 0);
606     OUT_BCS_BATCH(batch, 0);
607     OUT_BCS_BATCH(batch, 0);
608
609     /* the DW 58-60 is the second ILDB buffer */
610     OUT_BCS_BATCH(batch, 0);
611     OUT_BCS_BATCH(batch, 0);
612     OUT_BCS_BATCH(batch, 0);
613
614     ADVANCE_BCS_BATCH(batch);
615 }
616
617 static void
618 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
619                               struct intel_encoder_context *encoder_context)
620 {
621     struct intel_batchbuffer *batch = encoder_context->base.batch;
622     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
623
624     int i;
625
626     BEGIN_BCS_BATCH(batch, 71);
627
628     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
629
630     /* Reference frames and Current frames */
631     /* the DW1-32 is for the direct MV for reference */
632     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
633         if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
634             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
635                           I915_GEM_DOMAIN_INSTRUCTION, 0,
636                           0);
637             OUT_BCS_BATCH(batch, 0);
638         } else {
639             OUT_BCS_BATCH(batch, 0);
640             OUT_BCS_BATCH(batch, 0);
641         }
642     }
643
644     OUT_BCS_BATCH(batch, 0);
645
646     /* the DW34-36 is the MV for the current reference */
647     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
648                   I915_GEM_DOMAIN_INSTRUCTION, 0,
649                   0);
650
651     OUT_BCS_BATCH(batch, 0);
652     OUT_BCS_BATCH(batch, 0);
653
654     /* POL list */
655     for(i = 0; i < 32; i++) {
656         OUT_BCS_BATCH(batch, i/2);
657     }
658     OUT_BCS_BATCH(batch, 0);
659     OUT_BCS_BATCH(batch, 0);
660
661     ADVANCE_BCS_BATCH(batch);
662 }
663
664
665 static void
666 gen9_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
667                                  struct intel_encoder_context *encoder_context)
668 {
669     struct intel_batchbuffer *batch = encoder_context->base.batch;
670     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
671
672     BEGIN_BCS_BATCH(batch, 10);
673
674     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
675     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
676                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
677                   0);
678     OUT_BCS_BATCH(batch, 0);
679     OUT_BCS_BATCH(batch, 0);
680
681     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
682     OUT_BCS_BATCH(batch, 0);
683     OUT_BCS_BATCH(batch, 0);
684     OUT_BCS_BATCH(batch, 0);
685
686     /* the DW7-9 is for Bitplane Read Buffer Base Address */
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     OUT_BCS_BATCH(batch, 0);
690
691     ADVANCE_BCS_BATCH(batch);
692 }
693
694
695 static void gen9_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
696                                                       struct encode_state *encode_state,
697                                                       struct intel_encoder_context *encoder_context)
698 {
699     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
700
701     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
702     mfc_context->set_surface_state(ctx, encoder_context);
703     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
704     gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
705     gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
706     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
707     mfc_context->avc_qm_state(ctx, encoder_context);
708     mfc_context->avc_fqm_state(ctx, encoder_context);
709     gen9_mfc_avc_directmode_state(ctx, encoder_context);
710     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
711 }
712
713
714 static VAStatus gen9_mfc_run(VADriverContextP ctx,
715                              struct encode_state *encode_state,
716                              struct intel_encoder_context *encoder_context)
717 {
718     struct intel_batchbuffer *batch = encoder_context->base.batch;
719
720     intel_batchbuffer_flush(batch);             //run the pipeline
721
722     return VA_STATUS_SUCCESS;
723 }
724
725
726 static VAStatus
727 gen9_mfc_stop(VADriverContextP ctx,
728               struct encode_state *encode_state,
729               struct intel_encoder_context *encoder_context,
730               int *encoded_bits_size)
731 {
732     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
733     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
734     VACodedBufferSegment *coded_buffer_segment;
735
736     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
737     assert(vaStatus == VA_STATUS_SUCCESS);
738     *encoded_bits_size = coded_buffer_segment->size * 8;
739     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
740
741     return VA_STATUS_SUCCESS;
742 }
743
744
745 static void
746 gen9_mfc_avc_slice_state(VADriverContextP ctx,
747                          VAEncPictureParameterBufferH264 *pic_param,
748                          VAEncSliceParameterBufferH264 *slice_param,
749                          struct encode_state *encode_state,
750                          struct intel_encoder_context *encoder_context,
751                          int rate_control_enable,
752                          int qp,
753                          struct intel_batchbuffer *batch)
754 {
755     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
756     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
757     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
758     int beginmb = slice_param->macroblock_address;
759     int endmb = beginmb + slice_param->num_macroblocks;
760     int beginx = beginmb % width_in_mbs;
761     int beginy = beginmb / width_in_mbs;
762     int nextx =  endmb % width_in_mbs;
763     int nexty = endmb / width_in_mbs;
764     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
765     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
766     int maxQpN, maxQpP;
767     unsigned char correct[6], grow, shrink;
768     int i;
769     int weighted_pred_idc = 0;
770     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
771     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
772     int num_ref_l0 = 0, num_ref_l1 = 0;
773
774     if (batch == NULL)
775         batch = encoder_context->base.batch;
776
777     if (slice_type == SLICE_TYPE_I) {
778         luma_log2_weight_denom = 0;
779         chroma_log2_weight_denom = 0;
780     } else if (slice_type == SLICE_TYPE_P) {
781         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
782         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
783
784         if (slice_param->num_ref_idx_active_override_flag)
785             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
786     } else if (slice_type == SLICE_TYPE_B) {
787         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
788         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
789         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
790
791         if (slice_param->num_ref_idx_active_override_flag) {
792             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
793             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
794         }
795
796         if (weighted_pred_idc == 2) {
797             /* 8.4.3 - Derivation process for prediction weights (8-279) */
798             luma_log2_weight_denom = 5;
799             chroma_log2_weight_denom = 5;
800         }
801     }
802
803     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
804     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
805
806     for (i = 0; i < 6; i++)
807         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
808
809     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
810         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
811     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
812         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
813
814     BEGIN_BCS_BATCH(batch, 11);;
815
816     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
817     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
818
819     OUT_BCS_BATCH(batch,
820                   (num_ref_l0 << 16) |
821                   (num_ref_l1 << 24) |
822                   (chroma_log2_weight_denom << 8) |
823                   (luma_log2_weight_denom << 0));
824
825     OUT_BCS_BATCH(batch,
826                   (weighted_pred_idc << 30) |
827                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
828                   (slice_param->disable_deblocking_filter_idc << 27) |
829                   (slice_param->cabac_init_idc << 24) |
830                   (qp<<16) |                    /*Slice Quantization Parameter*/
831                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
832                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
833     OUT_BCS_BATCH(batch,
834                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
835                   (beginx << 16) |
836                   slice_param->macroblock_address );
837     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
838     OUT_BCS_BATCH(batch,
839                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
840                   (1 << 30) |           /*ResetRateControlCounter*/
841                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
842                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
843                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/
844                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
845                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/
846                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/
847                   (last_slice << 19) |     /*IsLastSlice*/
848                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
849                   (1 << 17) |       /*HeaderPresentFlag*/
850                   (1 << 16) |       /*SliceData PresentFlag*/
851                   (1 << 15) |       /*TailPresentFlag*/
852                   (1 << 13) |       /*RBSP NAL TYPE*/
853                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
854     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
855     OUT_BCS_BATCH(batch,
856                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/
857                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
858                   (shrink << 8)  |
859                   (grow << 0));
860     OUT_BCS_BATCH(batch,
861                   (correct[5] << 20) |
862                   (correct[4] << 16) |
863                   (correct[3] << 12) |
864                   (correct[2] << 8) |
865                   (correct[1] << 4) |
866                   (correct[0] << 0));
867     OUT_BCS_BATCH(batch, 0);
868
869     ADVANCE_BCS_BATCH(batch);
870 }
871
872
873 #ifdef MFC_SOFTWARE_HASWELL
874
875 static int
876 gen9_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
877                               int qp,unsigned int *msg,
878                               struct intel_encoder_context *encoder_context,
879                               unsigned char target_mb_size, unsigned char max_mb_size,
880                               struct intel_batchbuffer *batch)
881 {
882     int len_in_dwords = 12;
883     unsigned int intra_msg;
884 #define         INTRA_MSG_FLAG          (1 << 13)
885 #define         INTRA_MBTYPE_MASK       (0x1F0000)
886     if (batch == NULL)
887         batch = encoder_context->base.batch;
888
889     BEGIN_BCS_BATCH(batch, len_in_dwords);
890
891     intra_msg = msg[0] & 0xC0FF;
892     intra_msg |= INTRA_MSG_FLAG;
893     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
894     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
895     OUT_BCS_BATCH(batch, 0);
896     OUT_BCS_BATCH(batch, 0);
897     OUT_BCS_BATCH(batch,
898                   (0 << 24) |           /* PackedMvNum, Debug*/
899                   (0 << 20) |           /* No motion vector */
900                   (1 << 19) |           /* CbpDcY */
901                   (1 << 18) |           /* CbpDcU */
902                   (1 << 17) |           /* CbpDcV */
903                   intra_msg);
904
905     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
906     OUT_BCS_BATCH(batch, 0x000F000F);     /* Code Block Pattern */
907
908     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
909
910     /*Stuff for Intra MB*/
911     OUT_BCS_BATCH(batch, msg[1]);       /* We using Intra16x16 no 4x4 predmode*/
912     OUT_BCS_BATCH(batch, msg[2]);
913     OUT_BCS_BATCH(batch, msg[3]&0xFF);
914
915     /*MaxSizeInWord and TargetSzieInWord*/
916     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
917                   (target_mb_size << 16) );
918
919     OUT_BCS_BATCH(batch, 0);
920
921     ADVANCE_BCS_BATCH(batch);
922
923     return len_in_dwords;
924 }
925
926 static int
927 gen9_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
928                               unsigned int *msg, unsigned int offset,
929                               struct intel_encoder_context *encoder_context,
930                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
931                               struct intel_batchbuffer *batch)
932 {
933     struct gen6_vme_context *vme_context = encoder_context->vme_context;
934     int len_in_dwords = 12;
935     unsigned int inter_msg = 0;
936     if (batch == NULL)
937         batch = encoder_context->base.batch;
938     {
939 #define MSG_MV_OFFSET   4
940         unsigned int *mv_ptr;
941         mv_ptr = msg + MSG_MV_OFFSET;
942         /* MV of VME output is based on 16 sub-blocks. So it is necessary
943          * to convert them to be compatible with the format of AVC_PAK
944          * command.
945          */
946         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
947             /* MV[0] and MV[2] are replicated */
948             mv_ptr[4] = mv_ptr[0];
949             mv_ptr[5] = mv_ptr[1];
950             mv_ptr[2] = mv_ptr[8];
951             mv_ptr[3] = mv_ptr[9];
952             mv_ptr[6] = mv_ptr[8];
953             mv_ptr[7] = mv_ptr[9];
954         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
955             /* MV[0] and MV[1] are replicated */
956             mv_ptr[2] = mv_ptr[0];
957             mv_ptr[3] = mv_ptr[1];
958             mv_ptr[4] = mv_ptr[16];
959             mv_ptr[5] = mv_ptr[17];
960             mv_ptr[6] = mv_ptr[24];
961             mv_ptr[7] = mv_ptr[25];
962         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
963                    !(msg[1] & SUBMB_SHAPE_MASK)) {
964             /* Don't touch MV[0] or MV[1] */
965             mv_ptr[2] = mv_ptr[8];
966             mv_ptr[3] = mv_ptr[9];
967             mv_ptr[4] = mv_ptr[16];
968             mv_ptr[5] = mv_ptr[17];
969             mv_ptr[6] = mv_ptr[24];
970             mv_ptr[7] = mv_ptr[25];
971         }
972     }
973
974     BEGIN_BCS_BATCH(batch, len_in_dwords);
975
976     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
977
978     inter_msg = 32;
979     /* MV quantity */
980     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
981         if (msg[1] & SUBMB_SHAPE_MASK)
982             inter_msg = 128;
983     }
984     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
985     OUT_BCS_BATCH(batch, offset);
986     inter_msg = msg[0] & (0x1F00FFFF);
987     inter_msg |= INTER_MV8;
988     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
989     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
990         (msg[1] & SUBMB_SHAPE_MASK)) {
991         inter_msg |= INTER_MV32;
992     }
993
994     OUT_BCS_BATCH(batch, inter_msg);
995
996     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
997     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */
998 #if 0
999     if ( slice_type == SLICE_TYPE_B) {
1000         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1001     } else {
1002         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1003     }
1004 #else
1005     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1006 #endif
1007
1008     inter_msg = msg[1] >> 8;
1009     /*Stuff for Inter MB*/
1010     OUT_BCS_BATCH(batch, inter_msg);
1011     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1012     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1013
1014     /*MaxSizeInWord and TargetSzieInWord*/
1015     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1016                   (target_mb_size << 16) );
1017
1018     OUT_BCS_BATCH(batch, 0x0);
1019     ADVANCE_BCS_BATCH(batch);
1020
1021     return len_in_dwords;
1022 }
1023
1024 #define         AVC_INTRA_RDO_OFFSET    4
1025 #define         AVC_INTER_RDO_OFFSET    10
1026 #define         AVC_INTER_MSG_OFFSET    8
1027 #define         AVC_INTER_MV_OFFSET             48
1028 #define         AVC_RDO_MASK            0xFFFF
1029
1030 static void
1031 gen9_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1032                                        struct encode_state *encode_state,
1033                                        struct intel_encoder_context *encoder_context,
1034                                        int slice_index,
1035                                        struct intel_batchbuffer *slice_batch)
1036 {
1037     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1038     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1039     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1040     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1041     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1042     unsigned int *msg = NULL, offset = 0;
1043     unsigned char *msg_ptr = NULL;
1044     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1045     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1046     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1047     int i,x,y;
1048     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1049     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1050     unsigned int tail_data[] = { 0x0, 0x0 };
1051     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1052     int is_intra = slice_type == SLICE_TYPE_I;
1053     int qp_slice;
1054
1055     qp_slice = qp;
1056     if (rate_control_mode == VA_RC_CBR) {
1057         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1058         if (encode_state->slice_header_index[slice_index] == 0) {
1059             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1060             qp_slice = qp;
1061         }
1062     }
1063
1064     /* only support for 8-bit pixel bit-depth */
1065     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1066     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1067     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1068     assert(qp >= 0 && qp < 52);
1069
1070          gen9_mfc_avc_slice_state(ctx,
1071                                   pPicParameter,
1072                                   pSliceParameter,
1073                                   encode_state, encoder_context,
1074                                   (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
1075
1076         if ( slice_index == 0)
1077             intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1078
1079          intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1080
1081     dri_bo_map(vme_context->vme_output.bo , 1);
1082     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1083
1084     if (is_intra) {
1085         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1086     } else {
1087         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1088     }
1089
1090     for (i = pSliceParameter->macroblock_address;
1091         i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1092         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1093         x = i % width_in_mbs;
1094         y = i / width_in_mbs;
1095         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1096
1097         if (is_intra) {
1098             assert(msg);
1099             gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1100         } else {
1101             int inter_rdo, intra_rdo;
1102             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1103             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1104             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1105             if (intra_rdo < inter_rdo) {
1106                 gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1107             } else {
1108                 msg += AVC_INTER_MSG_OFFSET;
1109                 gen9_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1110             }
1111         }
1112     }
1113
1114     dri_bo_unmap(vme_context->vme_output.bo);
1115
1116     if ( last_slice ) {
1117         mfc_context->insert_object(ctx, encoder_context,
1118                                    tail_data, 2, 8,
1119                                    2, 1, 1, 0, slice_batch);
1120     } else {
1121         mfc_context->insert_object(ctx, encoder_context,
1122                                    tail_data, 1, 8,
1123                                    1, 1, 1, 0, slice_batch);
1124     }
1125
1126
1127 }
1128
1129 static dri_bo *
1130 gen9_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1131                                   struct encode_state *encode_state,
1132                                   struct intel_encoder_context *encoder_context)
1133 {
1134     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1135     struct intel_batchbuffer *batch;
1136     dri_bo *batch_bo;
1137     int i;
1138
1139     batch = mfc_context->aux_batchbuffer;
1140     batch_bo = batch->buffer;
1141     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1142         gen9_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1143     }
1144
1145     intel_batchbuffer_align(batch, 8);
1146
1147     BEGIN_BCS_BATCH(batch, 2);
1148     OUT_BCS_BATCH(batch, 0);
1149     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1150     ADVANCE_BCS_BATCH(batch);
1151
1152     dri_bo_reference(batch_bo);
1153     intel_batchbuffer_free(batch);
1154     mfc_context->aux_batchbuffer = NULL;
1155
1156     return batch_bo;
1157 }
1158
1159 #else
1160
1161 static void
1162 gen9_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1163                                     struct encode_state *encode_state,
1164                                     struct intel_encoder_context *encoder_context)
1165
1166 {
1167     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1168     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1169
1170     assert(vme_context->vme_output.bo);
1171     mfc_context->buffer_suface_setup(ctx,
1172                                      &mfc_context->gpe_context,
1173                                      &vme_context->vme_output,
1174                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1175                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1176     assert(mfc_context->aux_batchbuffer_surface.bo);
1177     mfc_context->buffer_suface_setup(ctx,
1178                                      &mfc_context->gpe_context,
1179                                      &mfc_context->aux_batchbuffer_surface,
1180                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1181                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1182 }
1183
1184 static void
1185 gen9_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1186                                      struct encode_state *encode_state,
1187                                      struct intel_encoder_context *encoder_context)
1188
1189 {
1190     struct i965_driver_data *i965 = i965_driver_data(ctx);
1191     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1192     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1193     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1194     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1195     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1196     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1197     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1198     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
1199                                                            "MFC batchbuffer",
1200                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1201                                                            0x1000);
1202     mfc_context->buffer_suface_setup(ctx,
1203                                      &mfc_context->gpe_context,
1204                                      &mfc_context->mfc_batchbuffer_surface,
1205                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1206                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1207 }
1208
1209 static void
1210 gen9_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1211                                     struct encode_state *encode_state,
1212                                     struct intel_encoder_context *encoder_context)
1213 {
1214     gen9_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1215     gen9_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1216 }
1217
1218 static void
1219 gen9_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1220                                 struct encode_state *encode_state,
1221                                 struct intel_encoder_context *encoder_context)
1222 {
1223     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1224     struct gen6_interface_descriptor_data *desc;
1225     int i;
1226     dri_bo *bo;
1227
1228     bo = mfc_context->gpe_context.idrt.bo;
1229     dri_bo_map(bo, 1);
1230     assert(bo->virtual);
1231     desc = bo->virtual;
1232
1233     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1234         struct i965_kernel *kernel;
1235
1236         kernel = &mfc_context->gpe_context.kernels[i];
1237         assert(sizeof(*desc) == 32);
1238
1239         /*Setup the descritor table*/
1240         memset(desc, 0, sizeof(*desc));
1241         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1242         desc->desc2.sampler_count = 0;
1243         desc->desc2.sampler_state_pointer = 0;
1244         desc->desc3.binding_table_entry_count = 2;
1245         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1246         desc->desc4.constant_urb_entry_read_offset = 0;
1247         desc->desc4.constant_urb_entry_read_length = 4;
1248
1249         /*kernel start*/
1250         dri_bo_emit_reloc(bo,
1251                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1252                           0,
1253                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1254                           kernel->bo);
1255         desc++;
1256     }
1257
1258     dri_bo_unmap(bo);
1259 }
1260
1261 static void
1262 gen9_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1263                                     struct encode_state *encode_state,
1264                                     struct intel_encoder_context *encoder_context)
1265 {
1266     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1267
1268     (void)mfc_context;
1269 }
1270
1271 static void
1272 gen9_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1273                                          int index,
1274                                          int head_offset,
1275                                          int batchbuffer_offset,
1276                                          int head_size,
1277                                          int tail_size,
1278                                          int number_mb_cmds,
1279                                          int first_object,
1280                                          int last_object,
1281                                          int last_slice,
1282                                          int mb_x,
1283                                          int mb_y,
1284                                          int width_in_mbs,
1285                                          int qp)
1286 {
1287     BEGIN_BATCH(batch, 12);
1288
1289     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1290     OUT_BATCH(batch, index);
1291     OUT_BATCH(batch, 0);
1292     OUT_BATCH(batch, 0);
1293     OUT_BATCH(batch, 0);
1294     OUT_BATCH(batch, 0);
1295
1296     /*inline data */
1297     OUT_BATCH(batch, head_offset);
1298     OUT_BATCH(batch, batchbuffer_offset);
1299     OUT_BATCH(batch,
1300               head_size << 16 |
1301               tail_size);
1302     OUT_BATCH(batch,
1303               number_mb_cmds << 16 |
1304               first_object << 2 |
1305               last_object << 1 |
1306               last_slice);
1307     OUT_BATCH(batch,
1308               mb_y << 8 |
1309               mb_x);
1310     OUT_BATCH(batch,
1311               qp << 16 |
1312               width_in_mbs);
1313
1314     ADVANCE_BATCH(batch);
1315 }
1316
1317 static void
1318 gen9_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1319                                        struct intel_encoder_context *encoder_context,
1320                                        VAEncSliceParameterBufferH264 *slice_param,
1321                                        int head_offset,
1322                                        unsigned short head_size,
1323                                        unsigned short tail_size,
1324                                        int batchbuffer_offset,
1325                                        int qp,
1326                                        int last_slice)
1327 {
1328     struct intel_batchbuffer *batch = encoder_context->base.batch;
1329     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1330     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1331     int total_mbs = slice_param->num_macroblocks;
1332     int number_mb_cmds = 128;
1333     int starting_mb = 0;
1334     int last_object = 0;
1335     int first_object = 1;
1336     int i;
1337     int mb_x, mb_y;
1338     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1339
1340     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1341         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1342         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1343         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1344         assert(mb_x <= 255 && mb_y <= 255);
1345
1346         starting_mb += number_mb_cmds;
1347
1348         gen9_mfc_batchbuffer_emit_object_command(batch,
1349                                                  index,
1350                                                  head_offset,
1351                                                  batchbuffer_offset,
1352                                                  head_size,
1353                                                  tail_size,
1354                                                  number_mb_cmds,
1355                                                  first_object,
1356                                                  last_object,
1357                                                  last_slice,
1358                                                  mb_x,
1359                                                  mb_y,
1360                                                  width_in_mbs,
1361                                                  qp);
1362
1363         if (first_object) {
1364             head_offset += head_size;
1365             batchbuffer_offset += head_size;
1366         }
1367
1368         if (last_object) {
1369             head_offset += tail_size;
1370             batchbuffer_offset += tail_size;
1371         }
1372
1373         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1374
1375         first_object = 0;
1376     }
1377
1378     if (!last_object) {
1379         last_object = 1;
1380         number_mb_cmds = total_mbs % number_mb_cmds;
1381         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1382         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1383         assert(mb_x <= 255 && mb_y <= 255);
1384         starting_mb += number_mb_cmds;
1385
1386         gen9_mfc_batchbuffer_emit_object_command(batch,
1387                                                  index,
1388                                                  head_offset,
1389                                                  batchbuffer_offset,
1390                                                  head_size,
1391                                                  tail_size,
1392                                                  number_mb_cmds,
1393                                                  first_object,
1394                                                  last_object,
1395                                                  last_slice,
1396                                                  mb_x,
1397                                                  mb_y,
1398                                                  width_in_mbs,
1399                                                  qp);
1400     }
1401 }
1402
1403 /*
1404  * return size in Owords (16bytes)
1405  */
1406 static int
1407 gen9_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1408                                struct encode_state *encode_state,
1409                                struct intel_encoder_context *encoder_context,
1410                                int slice_index,
1411                                int batchbuffer_offset)
1412 {
1413     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1414     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1415     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1416     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1417     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1418     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1419     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1420     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1421     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1422     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1423     unsigned int tail_data[] = { 0x0, 0x0 };
1424     long head_offset;
1425     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1426     unsigned short head_size, tail_size;
1427     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1428     int qp_slice;
1429
1430     qp_slice = qp;
1431     if (rate_control_mode == VA_RC_CBR) {
1432         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1433         if (encode_state->slice_header_index[slice_index] == 0) {
1434             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1435             qp_slice = qp;
1436         }
1437     }
1438
1439     /* only support for 8-bit pixel bit-depth */
1440     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1441     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1442     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1443     assert(qp >= 0 && qp < 52);
1444
1445     head_offset = old_used / 16;
1446     gen9_mfc_avc_slice_state(ctx,
1447                              pPicParameter,
1448                              pSliceParameter,
1449                              encode_state,
1450                              encoder_context,
1451                              (rate_control_mode == VA_RC_CBR),
1452                              qp_slice,
1453                              slice_batch);
1454
1455     if (slice_index == 0)
1456         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1457
1458
1459     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1460
1461
1462     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1463     used = intel_batchbuffer_used_size(slice_batch);
1464     head_size = (used - old_used) / 16;
1465     old_used = used;
1466
1467     /* tail */
1468     if (last_slice) {
1469         mfc_context->insert_object(ctx,
1470                                    encoder_context,
1471                                    tail_data,
1472                                    2,
1473                                    8,
1474                                    2,
1475                                    1,
1476                                    1,
1477                                    0,
1478                                    slice_batch);
1479     } else {
1480         mfc_context->insert_object(ctx,
1481                                    encoder_context,
1482                                    tail_data,
1483                                    1,
1484                                    8,
1485                                    1,
1486                                    1,
1487                                    1,
1488                                    0,
1489                                    slice_batch);
1490     }
1491
1492     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1493     used = intel_batchbuffer_used_size(slice_batch);
1494     tail_size = (used - old_used) / 16;
1495
1496     gen9_mfc_avc_batchbuffer_slice_command(ctx,
1497                                            encoder_context,
1498                                            pSliceParameter,
1499                                            head_offset,
1500                                            head_size,
1501                                            tail_size,
1502                                            batchbuffer_offset,
1503                                            qp,
1504                                            last_slice);
1505
1506     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1507 }
1508
1509 static void
1510 gen9_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1511                                   struct encode_state *encode_state,
1512                                   struct intel_encoder_context *encoder_context)
1513 {
1514     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1515     struct intel_batchbuffer *batch = encoder_context->base.batch;
1516     int i, size, offset = 0;
1517
1518     intel_batchbuffer_start_atomic(batch, 0x4000);
1519     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1520
1521     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1522         size = gen9_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1523         offset += size;
1524     }
1525
1526     intel_batchbuffer_end_atomic(batch);
1527     intel_batchbuffer_flush(batch);
1528 }
1529
1530 static void
1531 gen9_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1532                                struct encode_state *encode_state,
1533                                struct intel_encoder_context *encoder_context)
1534 {
1535     gen9_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1536     gen9_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1537     gen9_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1538     gen9_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1539 }
1540
1541 static dri_bo *
1542 gen9_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1543                                   struct encode_state *encode_state,
1544                                   struct intel_encoder_context *encoder_context)
1545 {
1546     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1547
1548     gen9_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1549     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1550
1551     return mfc_context->mfc_batchbuffer_surface.bo;
1552 }
1553
1554 #endif
1555
1556 static void
1557 gen9_mfc_avc_pipeline_programing(VADriverContextP ctx,
1558                                  struct encode_state *encode_state,
1559                                  struct intel_encoder_context *encoder_context)
1560 {
1561     struct intel_batchbuffer *batch = encoder_context->base.batch;
1562     dri_bo *slice_batch_bo;
1563
1564     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1565         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1566         assert(0);
1567         return;
1568     }
1569
1570 #ifdef MFC_SOFTWARE_HASWELL
1571     slice_batch_bo = gen9_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1572 #else
1573     slice_batch_bo = gen9_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1574 #endif
1575
1576     // begin programing
1577     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1578     intel_batchbuffer_emit_mi_flush(batch);
1579
1580     // picture level programing
1581     gen9_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1582
1583     BEGIN_BCS_BATCH(batch, 3);
1584     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1585     OUT_BCS_RELOC(batch,
1586                   slice_batch_bo,
1587                   I915_GEM_DOMAIN_COMMAND, 0,
1588                   0);
1589     OUT_BCS_BATCH(batch, 0);
1590     ADVANCE_BCS_BATCH(batch);
1591
1592     // end programing
1593     intel_batchbuffer_end_atomic(batch);
1594
1595     dri_bo_unreference(slice_batch_bo);
1596 }
1597
1598
1599 static VAStatus
1600 gen9_mfc_avc_encode_picture(VADriverContextP ctx,
1601                             struct encode_state *encode_state,
1602                             struct intel_encoder_context *encoder_context)
1603 {
1604     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1605     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1606     int current_frame_bits_size;
1607     int sts;
1608
1609     for (;;) {
1610         gen9_mfc_init(ctx, encode_state, encoder_context);
1611         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1612         /*Programing bcs pipeline*/
1613         gen9_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
1614         gen9_mfc_run(ctx, encode_state, encoder_context);
1615         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1616             gen9_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1617             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1618             if (sts == BRC_NO_HRD_VIOLATION) {
1619                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1620                 break;
1621             }
1622             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1623                 if (!mfc_context->hrd.violation_noted) {
1624                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1625                     mfc_context->hrd.violation_noted = 1;
1626                 }
1627                 return VA_STATUS_SUCCESS;
1628             }
1629         } else {
1630             break;
1631         }
1632     }
1633
1634     return VA_STATUS_SUCCESS;
1635 }
1636
1637 /*
1638  * MPEG-2
1639  */
1640
1641 static const int
1642 va_to_gen9_mpeg2_picture_type[3] = {
1643     1,  /* I */
1644     2,  /* P */
1645     3   /* B */
1646 };
1647
1648 static void
1649 gen9_mfc_mpeg2_pic_state(VADriverContextP ctx,
1650                          struct intel_encoder_context *encoder_context,
1651                          struct encode_state *encode_state)
1652 {
1653     struct intel_batchbuffer *batch = encoder_context->base.batch;
1654     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1655     VAEncPictureParameterBufferMPEG2 *pic_param;
1656     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1657     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1658     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1659
1660     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1661     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1662     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1663
1664     BEGIN_BCS_BATCH(batch, 13);
1665     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1666     OUT_BCS_BATCH(batch,
1667                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1668                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1669                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1670                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1671                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1672                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1673                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1674                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1675                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1676                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1677                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1678                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1679     OUT_BCS_BATCH(batch,
1680                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1681                   va_to_gen9_mpeg2_picture_type[pic_param->picture_type] << 9 |
1682                   0);
1683     OUT_BCS_BATCH(batch,
1684                   1 << 31 |     /* slice concealment */
1685                   (height_in_mbs - 1) << 16 |
1686                   (width_in_mbs - 1));
1687
1688     if (slice_param && slice_param->quantiser_scale_code >= 14)
1689         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1690     else
1691         OUT_BCS_BATCH(batch, 0);
1692
1693     OUT_BCS_BATCH(batch, 0);
1694     OUT_BCS_BATCH(batch,
1695                   0xFFF << 16 | /* InterMBMaxSize */
1696                   0xFFF << 0 |  /* IntraMBMaxSize */
1697                   0);
1698     OUT_BCS_BATCH(batch, 0);
1699     OUT_BCS_BATCH(batch, 0);
1700     OUT_BCS_BATCH(batch, 0);
1701     OUT_BCS_BATCH(batch, 0);
1702     OUT_BCS_BATCH(batch, 0);
1703     OUT_BCS_BATCH(batch, 0);
1704     ADVANCE_BCS_BATCH(batch);
1705 }
1706
1707 static void
1708 gen9_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1709 {
1710     unsigned char intra_qm[64] = {
1711         8, 16, 19, 22, 26, 27, 29, 34,
1712         16, 16, 22, 24, 27, 29, 34, 37,
1713         19, 22, 26, 27, 29, 34, 34, 38,
1714         22, 22, 26, 27, 29, 34, 37, 40,
1715         22, 26, 27, 29, 32, 35, 40, 48,
1716         26, 27, 29, 32, 35, 40, 48, 58,
1717         26, 27, 29, 34, 38, 46, 56, 69,
1718         27, 29, 35, 38, 46, 56, 69, 83
1719     };
1720
1721     unsigned char non_intra_qm[64] = {
1722         16, 16, 16, 16, 16, 16, 16, 16,
1723         16, 16, 16, 16, 16, 16, 16, 16,
1724         16, 16, 16, 16, 16, 16, 16, 16,
1725         16, 16, 16, 16, 16, 16, 16, 16,
1726         16, 16, 16, 16, 16, 16, 16, 16,
1727         16, 16, 16, 16, 16, 16, 16, 16,
1728         16, 16, 16, 16, 16, 16, 16, 16,
1729         16, 16, 16, 16, 16, 16, 16, 16
1730     };
1731
1732     gen9_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1733     gen9_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1734 }
1735
1736 static void
1737 gen9_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1738 {
1739     unsigned short intra_fqm[64] = {
1740         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1741         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1742         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1743         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1744         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1745         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1746         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1747         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1748     };
1749
1750     unsigned short non_intra_fqm[64] = {
1751         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1752         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1753         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1754         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1755         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1756         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1757         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1758         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1759     };
1760
1761     gen9_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1762     gen9_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1763 }
1764
1765 static void
1766 gen9_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1767                                 struct intel_encoder_context *encoder_context,
1768                                 int x, int y,
1769                                 int next_x, int next_y,
1770                                 int is_fisrt_slice_group,
1771                                 int is_last_slice_group,
1772                                 int intra_slice,
1773                                 int qp,
1774                                 struct intel_batchbuffer *batch)
1775 {
1776     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1777
1778     if (batch == NULL)
1779         batch = encoder_context->base.batch;
1780
1781     BEGIN_BCS_BATCH(batch, 8);
1782
1783     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1784     OUT_BCS_BATCH(batch,
1785                   0 << 31 |                             /* MbRateCtrlFlag */
1786                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1787                   1 << 17 |                             /* Insert Header before the first slice group data */
1788                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1789                   1 << 15 |                             /* TailPresentFlag: always 1 */
1790                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1791                   !!intra_slice << 13 |                 /* IntraSlice */
1792                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1793                   0);
1794     OUT_BCS_BATCH(batch,
1795                   next_y << 24 |
1796                   next_x << 16 |
1797                   y << 8 |
1798                   x << 0 |
1799                   0);
1800     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1801     /* bitstream pointer is only loaded once for the first slice of a frame when
1802      * LoadSlicePointerFlag is 0
1803      */
1804     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1805     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1806     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1807     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1808
1809     ADVANCE_BCS_BATCH(batch);
1810 }
1811
1812 static int
1813 gen9_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1814                                 struct intel_encoder_context *encoder_context,
1815                                 int x, int y,
1816                                 int first_mb_in_slice,
1817                                 int last_mb_in_slice,
1818                                 int first_mb_in_slice_group,
1819                                 int last_mb_in_slice_group,
1820                                 int mb_type,
1821                                 int qp_scale_code,
1822                                 int coded_block_pattern,
1823                                 unsigned char target_size_in_word,
1824                                 unsigned char max_size_in_word,
1825                                 struct intel_batchbuffer *batch)
1826 {
1827     int len_in_dwords = 9;
1828
1829     if (batch == NULL)
1830         batch = encoder_context->base.batch;
1831
1832     BEGIN_BCS_BATCH(batch, len_in_dwords);
1833
1834     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1835     OUT_BCS_BATCH(batch,
1836                   0 << 24 |     /* PackedMvNum */
1837                   0 << 20 |     /* MvFormat */
1838                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1839                   0 << 15 |     /* TransformFlag: frame DCT */
1840                   0 << 14 |     /* FieldMbFlag */
1841                   1 << 13 |     /* IntraMbFlag */
1842                   mb_type << 8 |   /* MbType: Intra */
1843                   0 << 2 |      /* SkipMbFlag */
1844                   0 << 0 |      /* InterMbMode */
1845                   0);
1846     OUT_BCS_BATCH(batch, y << 16 | x);
1847     OUT_BCS_BATCH(batch,
1848                   max_size_in_word << 24 |
1849                   target_size_in_word << 16 |
1850                   coded_block_pattern << 6 |      /* CBP */
1851                   0);
1852     OUT_BCS_BATCH(batch,
1853                   last_mb_in_slice << 31 |
1854                   first_mb_in_slice << 30 |
1855                   0 << 27 |     /* EnableCoeffClamp */
1856                   last_mb_in_slice_group << 26 |
1857                   0 << 25 |     /* MbSkipConvDisable */
1858                   first_mb_in_slice_group << 24 |
1859                   0 << 16 |     /* MvFieldSelect */
1860                   qp_scale_code << 0 |
1861                   0);
1862     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1863     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1864     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1865     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1866
1867     ADVANCE_BCS_BATCH(batch);
1868
1869     return len_in_dwords;
1870 }
1871
1872 /* Byte offset */
1873 #define MPEG2_INTER_MV_OFFSET   48
1874
1875 static struct _mv_ranges
1876 {
1877     int low;    /* in the unit of 1/2 pixel */
1878     int high;   /* in the unit of 1/2 pixel */
1879 } mv_ranges[] = {
1880     {0, 0},
1881     {-16, 15},
1882     {-32, 31},
1883     {-64, 63},
1884     {-128, 127},
1885     {-256, 255},
1886     {-512, 511},
1887     {-1024, 1023},
1888     {-2048, 2047},
1889     {-4096, 4095}
1890 };
1891
1892 static int
1893 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1894 {
1895     if (mv + pos * 16 * 2 < 0 ||
1896         mv + (pos + 1) * 16 * 2 > display_max * 2)
1897         mv = 0;
1898
1899     if (f_code > 0 && f_code < 10) {
1900         if (mv < mv_ranges[f_code].low)
1901             mv = mv_ranges[f_code].low;
1902
1903         if (mv > mv_ranges[f_code].high)
1904             mv = mv_ranges[f_code].high;
1905     }
1906
1907     return mv;
1908 }
1909
1910 static int
1911 gen9_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
1912                                 struct encode_state *encode_state,
1913                                 struct intel_encoder_context *encoder_context,
1914                                 unsigned int *msg,
1915                                 int width_in_mbs, int height_in_mbs,
1916                                 int x, int y,
1917                                 int first_mb_in_slice,
1918                                 int last_mb_in_slice,
1919                                 int first_mb_in_slice_group,
1920                                 int last_mb_in_slice_group,
1921                                 int qp_scale_code,
1922                                 unsigned char target_size_in_word,
1923                                 unsigned char max_size_in_word,
1924                                 struct intel_batchbuffer *batch)
1925 {
1926     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1927     int len_in_dwords = 9;
1928     short *mvptr, mvx0, mvy0, mvx1, mvy1;
1929
1930     if (batch == NULL)
1931         batch = encoder_context->base.batch;
1932
1933     mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
1934     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
1935     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
1936     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
1937     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
1938
1939     BEGIN_BCS_BATCH(batch, len_in_dwords);
1940
1941     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1942     OUT_BCS_BATCH(batch,
1943                   2 << 24 |     /* PackedMvNum */
1944                   7 << 20 |     /* MvFormat */
1945                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1946                   0 << 15 |     /* TransformFlag: frame DCT */
1947                   0 << 14 |     /* FieldMbFlag */
1948                   0 << 13 |     /* IntraMbFlag */
1949                   1 << 8 |      /* MbType: Frame-based */
1950                   0 << 2 |      /* SkipMbFlag */
1951                   0 << 0 |      /* InterMbMode */
1952                   0);
1953     OUT_BCS_BATCH(batch, y << 16 | x);
1954     OUT_BCS_BATCH(batch,
1955                   max_size_in_word << 24 |
1956                   target_size_in_word << 16 |
1957                   0x3f << 6 |   /* CBP */
1958                   0);
1959     OUT_BCS_BATCH(batch,
1960                   last_mb_in_slice << 31 |
1961                   first_mb_in_slice << 30 |
1962                   0 << 27 |     /* EnableCoeffClamp */
1963                   last_mb_in_slice_group << 26 |
1964                   0 << 25 |     /* MbSkipConvDisable */
1965                   first_mb_in_slice_group << 24 |
1966                   0 << 16 |     /* MvFieldSelect */
1967                   qp_scale_code << 0 |
1968                   0);
1969
1970     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
1971     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
1972     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1973     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1974
1975     ADVANCE_BCS_BATCH(batch);
1976
1977     return len_in_dwords;
1978 }
1979
1980 static void
1981 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
1982                                            struct encode_state *encode_state,
1983                                            struct intel_encoder_context *encoder_context,
1984                                            struct intel_batchbuffer *slice_batch)
1985 {
1986     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1987     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
1988
1989     if (encode_state->packed_header_data[idx]) {
1990         VAEncPackedHeaderParameterBuffer *param = NULL;
1991         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1992         unsigned int length_in_bits;
1993
1994         assert(encode_state->packed_header_param[idx]);
1995         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1996         length_in_bits = param->bit_length;
1997
1998         mfc_context->insert_object(ctx,
1999                                    encoder_context,
2000                                    header_data,
2001                                    ALIGN(length_in_bits, 32) >> 5,
2002                                    length_in_bits & 0x1f,
2003                                    5,   /* FIXME: check it */
2004                                    0,
2005                                    0,
2006                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2007                                    slice_batch);
2008     }
2009
2010     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2011
2012     if (encode_state->packed_header_data[idx]) {
2013         VAEncPackedHeaderParameterBuffer *param = NULL;
2014         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2015         unsigned int length_in_bits;
2016
2017         assert(encode_state->packed_header_param[idx]);
2018         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2019         length_in_bits = param->bit_length;
2020
2021         mfc_context->insert_object(ctx,
2022                                    encoder_context,
2023                                    header_data,
2024                                    ALIGN(length_in_bits, 32) >> 5,
2025                                    length_in_bits & 0x1f,
2026                                    5,   /* FIXME: check it */
2027                                    0,
2028                                    0,
2029                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2030                                    slice_batch);
2031     }
2032 }
2033
2034 static void
2035 gen9_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2036                                     struct encode_state *encode_state,
2037                                     struct intel_encoder_context *encoder_context,
2038                                     int slice_index,
2039                                     VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2040                                     struct intel_batchbuffer *slice_batch)
2041 {
2042     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2043     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2044     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2045     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2046     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2047     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2048     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2049     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2050     int i, j;
2051     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2052     unsigned int *msg = NULL;
2053     unsigned char *msg_ptr = NULL;
2054
2055     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2056     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2057     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2058     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2059
2060     dri_bo_map(vme_context->vme_output.bo , 0);
2061     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2062
2063     if (next_slice_group_param) {
2064         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2065         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2066     } else {
2067         h_next_start_pos = 0;
2068         v_next_start_pos = height_in_mbs;
2069     }
2070
2071     gen9_mfc_mpeg2_slicegroup_state(ctx,
2072                                     encoder_context,
2073                                     h_start_pos,
2074                                     v_start_pos,
2075                                     h_next_start_pos,
2076                                     v_next_start_pos,
2077                                     slice_index == 0,
2078                                     next_slice_group_param == NULL,
2079                                     slice_param->is_intra_slice,
2080                                     slice_param->quantiser_scale_code,
2081                                     slice_batch);
2082
2083     if (slice_index == 0)
2084         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2085
2086     /* Insert '00' to make sure the header is valid */
2087     mfc_context->insert_object(ctx,
2088                                encoder_context,
2089                                (unsigned int*)section_delimiter,
2090                                1,
2091                                8,   /* 8bits in the last DWORD */
2092                                1,   /* 1 byte */
2093                                1,
2094                                0,
2095                                0,
2096                                slice_batch);
2097
2098     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2099         /* PAK for each macroblocks */
2100         for (j = 0; j < slice_param->num_macroblocks; j++) {
2101             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2102             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2103             int first_mb_in_slice = (j == 0);
2104             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2105             int first_mb_in_slice_group = (i == 0 && j == 0);
2106             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2107                                           j == slice_param->num_macroblocks - 1);
2108
2109             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2110
2111             if (slice_param->is_intra_slice) {
2112                 gen9_mfc_mpeg2_pak_object_intra(ctx,
2113                                                 encoder_context,
2114                                                 h_pos, v_pos,
2115                                                 first_mb_in_slice,
2116                                                 last_mb_in_slice,
2117                                                 first_mb_in_slice_group,
2118                                                 last_mb_in_slice_group,
2119                                                 0x1a,
2120                                                 slice_param->quantiser_scale_code,
2121                                                 0x3f,
2122                                                 0,
2123                                                 0xff,
2124                                                 slice_batch);
2125             } else {
2126                 int inter_rdo, intra_rdo;
2127                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2128                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2129
2130                 if (intra_rdo < inter_rdo)
2131                     gen9_mfc_mpeg2_pak_object_intra(ctx,
2132                                                      encoder_context,
2133                                                      h_pos, v_pos,
2134                                                      first_mb_in_slice,
2135                                                      last_mb_in_slice,
2136                                                      first_mb_in_slice_group,
2137                                                      last_mb_in_slice_group,
2138                                                      0x1a,
2139                                                      slice_param->quantiser_scale_code,
2140                                                      0x3f,
2141                                                      0,
2142                                                      0xff,
2143                                                      slice_batch);
2144                 else
2145                     gen9_mfc_mpeg2_pak_object_inter(ctx,
2146                                                 encode_state,
2147                                                 encoder_context,
2148                                                 msg,
2149                                                 width_in_mbs, height_in_mbs,
2150                                                 h_pos, v_pos,
2151                                                 first_mb_in_slice,
2152                                                 last_mb_in_slice,
2153                                                 first_mb_in_slice_group,
2154                                                 last_mb_in_slice_group,
2155                                                 slice_param->quantiser_scale_code,
2156                                                 0,
2157                                                 0xff,
2158                                                 slice_batch);
2159             }
2160         }
2161
2162         slice_param++;
2163     }
2164
2165     dri_bo_unmap(vme_context->vme_output.bo);
2166
2167     /* tail data */
2168     if (next_slice_group_param == NULL) { /* end of a picture */
2169         mfc_context->insert_object(ctx,
2170                                    encoder_context,
2171                                    (unsigned int *)tail_delimiter,
2172                                    2,
2173                                    8,   /* 8bits in the last DWORD */
2174                                    5,   /* 5 bytes */
2175                                    1,
2176                                    1,
2177                                    0,
2178                                    slice_batch);
2179     } else {        /* end of a lsice group */
2180         mfc_context->insert_object(ctx,
2181                                    encoder_context,
2182                                    (unsigned int *)section_delimiter,
2183                                    1,
2184                                    8,   /* 8bits in the last DWORD */
2185                                    1,   /* 1 byte */
2186                                    1,
2187                                    1,
2188                                    0,
2189                                    slice_batch);
2190     }
2191 }
2192
2193 /*
2194  * A batch buffer for all slices, including slice state
2195  * slice insert object and slice pak object commands
2196  */
2197 static dri_bo *
2198 gen9_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2199                                           struct encode_state *encode_state,
2200                                           struct intel_encoder_context *encoder_context)
2201 {
2202     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2203     struct intel_batchbuffer *batch;
2204     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2205     dri_bo *batch_bo;
2206     int i;
2207
2208     batch = mfc_context->aux_batchbuffer;
2209     batch_bo = batch->buffer;
2210
2211     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2212         if (i == encode_state->num_slice_params_ext - 1)
2213             next_slice_group_param = NULL;
2214         else
2215             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2216
2217         gen9_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2218     }
2219
2220     intel_batchbuffer_align(batch, 8);
2221
2222     BEGIN_BCS_BATCH(batch, 2);
2223     OUT_BCS_BATCH(batch, 0);
2224     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2225     ADVANCE_BCS_BATCH(batch);
2226
2227     dri_bo_reference(batch_bo);
2228     intel_batchbuffer_free(batch);
2229     mfc_context->aux_batchbuffer = NULL;
2230
2231     return batch_bo;
2232 }
2233
2234 static void
2235 gen9_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2236                                            struct encode_state *encode_state,
2237                                            struct intel_encoder_context *encoder_context)
2238 {
2239     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2240
2241     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2242     mfc_context->set_surface_state(ctx, encoder_context);
2243     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2244     gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
2245     gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2246     gen9_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2247     gen9_mfc_mpeg2_qm_state(ctx, encoder_context);
2248     gen9_mfc_mpeg2_fqm_state(ctx, encoder_context);
2249 }
2250
2251 static void
2252 gen9_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2253                                    struct encode_state *encode_state,
2254                                    struct intel_encoder_context *encoder_context)
2255 {
2256     struct intel_batchbuffer *batch = encoder_context->base.batch;
2257     dri_bo *slice_batch_bo;
2258
2259     slice_batch_bo = gen9_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2260
2261     // begin programing
2262     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
2263     intel_batchbuffer_emit_mi_flush(batch);
2264
2265     // picture level programing
2266     gen9_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2267
2268     BEGIN_BCS_BATCH(batch, 4);
2269     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2270     OUT_BCS_RELOC(batch,
2271                   slice_batch_bo,
2272                   I915_GEM_DOMAIN_COMMAND, 0,
2273                   0);
2274     OUT_BCS_BATCH(batch, 0);
2275     OUT_BCS_BATCH(batch, 0);
2276     ADVANCE_BCS_BATCH(batch);
2277
2278     // end programing
2279     intel_batchbuffer_end_atomic(batch);
2280
2281     dri_bo_unreference(slice_batch_bo);
2282 }
2283
2284 static VAStatus
2285 intel_mfc_mpeg2_prepare(VADriverContextP ctx,
2286                         struct encode_state *encode_state,
2287                         struct intel_encoder_context *encoder_context)
2288 {
2289     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2290     struct object_surface *obj_surface;
2291     struct object_buffer *obj_buffer;
2292     struct i965_coded_buffer_segment *coded_buffer_segment;
2293     VAStatus vaStatus = VA_STATUS_SUCCESS;
2294     dri_bo *bo;
2295     int i;
2296
2297     /* reconstructed surface */
2298     obj_surface = encode_state->reconstructed_object;
2299     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2300     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2301     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2302     mfc_context->surface_state.width = obj_surface->orig_width;
2303     mfc_context->surface_state.height = obj_surface->orig_height;
2304     mfc_context->surface_state.w_pitch = obj_surface->width;
2305     mfc_context->surface_state.h_pitch = obj_surface->height;
2306
2307     /* forward reference */
2308     obj_surface = encode_state->reference_objects[0];
2309
2310     if (obj_surface && obj_surface->bo) {
2311         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2312         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2313     } else
2314         mfc_context->reference_surfaces[0].bo = NULL;
2315
2316     /* backward reference */
2317     obj_surface = encode_state->reference_objects[1];
2318
2319     if (obj_surface && obj_surface->bo) {
2320         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2321         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2322     } else {
2323         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2324
2325         if (mfc_context->reference_surfaces[1].bo)
2326             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2327     }
2328
2329     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2330         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2331
2332         if (mfc_context->reference_surfaces[i].bo)
2333             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2334     }
2335
2336     /* input YUV surface */
2337     obj_surface = encode_state->input_yuv_object;
2338     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2339     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2340
2341     /* coded buffer */
2342     obj_buffer = encode_state->coded_buf_object;
2343     bo = obj_buffer->buffer_store->bo;
2344     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2345     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2346     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2347     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2348
2349     /* set the internal flag to 0 to indicate the coded size is unknown */
2350     dri_bo_map(bo, 1);
2351     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2352     coded_buffer_segment->mapped = 0;
2353     coded_buffer_segment->codec = encoder_context->codec;
2354     dri_bo_unmap(bo);
2355
2356     return vaStatus;
2357 }
2358
2359 static VAStatus
2360 gen9_mfc_mpeg2_encode_picture(VADriverContextP ctx,
2361                               struct encode_state *encode_state,
2362                               struct intel_encoder_context *encoder_context)
2363 {
2364     gen9_mfc_init(ctx, encode_state, encoder_context);
2365     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2366     /*Programing bcs pipeline*/
2367     gen9_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2368     gen9_mfc_run(ctx, encode_state, encoder_context);
2369
2370     return VA_STATUS_SUCCESS;
2371 }
2372
2373 static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
2374                                VAEncPictureParameterBufferVP8 *pic_param,
2375                                VAQMatrixBufferVP8 *q_matrix)
2376 {
2377
2378     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
2379     unsigned char *coeff_probs_stream_in_buffer;
2380     
2381     mfc_context->vp8_state.frame_header_lf_update_pos = 0;
2382     mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
2383     mfc_context->vp8_state.frame_header_token_update_pos = 0;
2384     mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
2385
2386     mfc_context->vp8_state.prob_skip_false = 255;
2387     memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
2388     memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
2389     
2390     if (is_key_frame) {
2391         memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
2392         memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
2393
2394         mfc_context->vp8_state.prob_intra = 255;
2395         mfc_context->vp8_state.prob_last = 128;
2396         mfc_context->vp8_state.prob_gf = 128;
2397     } else {
2398         memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
2399         memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
2400
2401         mfc_context->vp8_state.prob_intra = 63;
2402         mfc_context->vp8_state.prob_last = 128;
2403         mfc_context->vp8_state.prob_gf = 128;
2404     }
2405     
2406     mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
2407   
2408     dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
2409     coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
2410     assert(coeff_probs_stream_in_buffer);
2411     memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
2412     dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
2413 }
2414
2415 static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
2416                                  VAQMatrixBufferVP8 *q_matrix)
2417 {
2418
2419     /*some other probabilities need to be updated*/
2420 }
2421
2422 extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
2423                            VAEncPictureParameterBufferVP8 *pic_param,
2424                            VAQMatrixBufferVP8 *q_matrix,
2425                            struct gen6_mfc_context *mfc_context);
2426
2427 static void vp8_enc_frame_header_binarize(struct encode_state *encode_state, 
2428                                           struct gen6_mfc_context *mfc_context)
2429 {
2430     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2431     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2432     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2433     unsigned char *frame_header_buffer;
2434
2435     binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context);
2436  
2437     dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
2438     frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
2439     assert(frame_header_buffer);
2440     memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
2441     dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
2442 }
2443
2444 #define MAX_VP8_FRAME_HEADER_SIZE              0x2000
2445 #define VP8_TOKEN_STATISTICS_BUFFER_SIZE       0x2000
2446
2447 static void gen9_mfc_vp8_init(VADriverContextP ctx,
2448                           struct encode_state *encode_state,
2449                           struct intel_encoder_context *encoder_context)
2450 {
2451     struct i965_driver_data *i965 = i965_driver_data(ctx);
2452     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2453     dri_bo *bo;
2454     int i;
2455     int width_in_mbs = 0;
2456     int height_in_mbs = 0;
2457     int slice_batchbuffer_size;
2458
2459     VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2460     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2461     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2462
2463     width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
2464     height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
2465
2466     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
2467         (SLICE_HEADER + SLICE_TAIL);
2468
2469     /*Encode common setup for MFC*/
2470     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2471     mfc_context->post_deblocking_output.bo = NULL;
2472
2473     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2474     mfc_context->pre_deblocking_output.bo = NULL;
2475
2476     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2477     mfc_context->uncompressed_picture_source.bo = NULL;
2478
2479     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
2480     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2481
2482     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2483         if ( mfc_context->direct_mv_buffers[i].bo != NULL)
2484             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2485         mfc_context->direct_mv_buffers[i].bo = NULL;
2486     }
2487
2488     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2489         if (mfc_context->reference_surfaces[i].bo != NULL)
2490             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2491         mfc_context->reference_surfaces[i].bo = NULL;
2492     }
2493
2494     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2495     bo = dri_bo_alloc(i965->intel.bufmgr,
2496                       "Buffer",
2497                       width_in_mbs * 64,
2498                       64);
2499     assert(bo);
2500     mfc_context->intra_row_store_scratch_buffer.bo = bo;
2501
2502     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2503     bo = dri_bo_alloc(i965->intel.bufmgr,
2504                       "Buffer",
2505                       width_in_mbs * height_in_mbs * 16,
2506                       64);
2507     assert(bo);
2508     mfc_context->macroblock_status_buffer.bo = bo;
2509
2510     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2511     bo = dri_bo_alloc(i965->intel.bufmgr,
2512                       "Buffer",
2513                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
2514                       64);
2515     assert(bo);
2516     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2517
2518     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2519     bo = dri_bo_alloc(i965->intel.bufmgr,
2520                       "Buffer",
2521                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
2522                       0x1000);
2523     assert(bo);
2524     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2525
2526     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2527     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2528
2529     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2530     mfc_context->aux_batchbuffer_surface.bo = NULL;
2531
2532     if (mfc_context->aux_batchbuffer)
2533         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2534
2535     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
2536     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
2537     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
2538     mfc_context->aux_batchbuffer_surface.pitch = 16;
2539     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
2540     mfc_context->aux_batchbuffer_surface.size_block = 16;
2541
2542     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
2543
2544     /* alloc vp8 encoding buffers*/
2545     dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
2546     bo = dri_bo_alloc(i965->intel.bufmgr,
2547                       "Buffer",
2548                       MAX_VP8_FRAME_HEADER_SIZE,
2549                       0x1000);
2550     assert(bo);
2551     mfc_context->vp8_state.frame_header_bo = bo;
2552
2553     mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 256 * 9;
2554     for(i = 0; i < 8; i++) {
2555         mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 256 * (i + 1);
2556     }
2557     dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
2558     bo = dri_bo_alloc(i965->intel.bufmgr,
2559                       "Buffer",
2560                       mfc_context->vp8_state.intermediate_buffer_max_size,
2561                       0x1000);
2562     assert(bo);
2563     mfc_context->vp8_state.intermediate_bo = bo;
2564
2565     dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
2566     bo = dri_bo_alloc(i965->intel.bufmgr,
2567                       "Buffer",
2568                       width_in_mbs * height_in_mbs * 16,
2569                       0x1000);
2570     assert(bo);
2571     mfc_context->vp8_state.stream_out_bo = bo;
2572
2573     dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
2574     bo = dri_bo_alloc(i965->intel.bufmgr,
2575                       "Buffer",
2576                       sizeof(vp8_default_coef_probs),
2577                       0x1000);
2578     assert(bo);
2579     mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
2580
2581     dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
2582     bo = dri_bo_alloc(i965->intel.bufmgr,
2583                       "Buffer",
2584                       VP8_TOKEN_STATISTICS_BUFFER_SIZE,
2585                       0x1000);
2586     assert(bo);
2587     mfc_context->vp8_state.token_statistics_bo = bo;
2588
2589     dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
2590     bo = dri_bo_alloc(i965->intel.bufmgr,
2591                       "Buffer",
2592                       width_in_mbs * 16 * 64,
2593                       0x1000);
2594     assert(bo);
2595     mfc_context->vp8_state.mpc_row_store_bo = bo;
2596
2597     vp8_enc_state_init(mfc_context, pic_param, q_matrix);
2598     vp8_enc_frame_header_binarize(encode_state, mfc_context);
2599 }
2600
2601 static VAStatus
2602 intel_mfc_vp8_prepare(VADriverContextP ctx,
2603                         struct encode_state *encode_state,
2604                         struct intel_encoder_context *encoder_context)
2605 {
2606     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2607     struct object_surface *obj_surface;
2608     struct object_buffer *obj_buffer;
2609     struct i965_coded_buffer_segment *coded_buffer_segment;
2610     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2611     VAStatus vaStatus = VA_STATUS_SUCCESS;
2612     dri_bo *bo;
2613     int i;
2614
2615     /* reconstructed surface */
2616     obj_surface = encode_state->reconstructed_object;
2617     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2618     if (pic_param->loop_filter_level[0] == 0) {
2619         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2620         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2621     } else {
2622         mfc_context->post_deblocking_output.bo = obj_surface->bo;
2623         dri_bo_reference(mfc_context->post_deblocking_output.bo);
2624     }
2625
2626     mfc_context->surface_state.width = obj_surface->orig_width;
2627     mfc_context->surface_state.height = obj_surface->orig_height;
2628     mfc_context->surface_state.w_pitch = obj_surface->width;
2629     mfc_context->surface_state.h_pitch = obj_surface->height;
2630
2631     /* set vp8 reference frames */
2632     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2633         obj_surface = encode_state->reference_objects[i];
2634
2635         if (obj_surface && obj_surface->bo) {
2636             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
2637             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2638         } else {
2639             mfc_context->reference_surfaces[i].bo = NULL;
2640         }
2641     }
2642
2643     /* input YUV surface */
2644     obj_surface = encode_state->input_yuv_object;
2645     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2646     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2647
2648     /* coded buffer */
2649     obj_buffer = encode_state->coded_buf_object;
2650     bo = obj_buffer->buffer_store->bo;
2651     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2652     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2653     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2654     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2655
2656     dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
2657     mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
2658     mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
2659     dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
2660
2661     /* set the internal flag to 0 to indicate the coded size is unknown */
2662     dri_bo_map(bo, 1);
2663     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2664     coded_buffer_segment->mapped = 0;
2665     coded_buffer_segment->codec = encoder_context->codec;
2666     dri_bo_unmap(bo);
2667
2668     return vaStatus;
2669 }
2670
2671 static void
2672 gen9_mfc_vp8_encoder_cfg(VADriverContextP ctx, 
2673                          struct encode_state *encode_state,
2674                          struct intel_encoder_context *encoder_context)
2675 {
2676     struct intel_batchbuffer *batch = encoder_context->base.batch;
2677     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2678     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2679     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2680
2681     BEGIN_BCS_BATCH(batch, 30);
2682     OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
2683
2684     OUT_BCS_BATCH(batch,
2685                   0 << 9 | /* compressed bitstream output disable */
2686                   1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
2687                   0 << 6 | /* RC initial pass */
2688                   0 << 4 | /* upate segment feature date flag */
2689                   1 << 3 | /* bitstream statistics output enable */
2690                   1 << 2 | /* token statistics output enable */
2691                   0 << 1 | /* final bitstream output disable */
2692                   0 << 0); /*DW1*/
2693     
2694     OUT_BCS_BATCH(batch, 0); /*DW2*/
2695
2696     OUT_BCS_BATCH(batch, 
2697                   0xfff << 16 | /* max intra mb bit count limit */
2698                   0xfff << 0  /* max inter mb bit count limit */
2699                   ); /*DW3*/
2700
2701     OUT_BCS_BATCH(batch, 0); /*DW4*/
2702     OUT_BCS_BATCH(batch, 0); /*DW5*/
2703     OUT_BCS_BATCH(batch, 0); /*DW6*/
2704     OUT_BCS_BATCH(batch, 0); /*DW7*/
2705     OUT_BCS_BATCH(batch, 0); /*DW8*/
2706     OUT_BCS_BATCH(batch, 0); /*DW9*/
2707     OUT_BCS_BATCH(batch, 0); /*DW10*/
2708     OUT_BCS_BATCH(batch, 0); /*DW11*/
2709     OUT_BCS_BATCH(batch, 0); /*DW12*/
2710     OUT_BCS_BATCH(batch, 0); /*DW13*/
2711     OUT_BCS_BATCH(batch, 0); /*DW14*/
2712     OUT_BCS_BATCH(batch, 0); /*DW15*/
2713     OUT_BCS_BATCH(batch, 0); /*DW16*/
2714     OUT_BCS_BATCH(batch, 0); /*DW17*/
2715     OUT_BCS_BATCH(batch, 0); /*DW18*/
2716     OUT_BCS_BATCH(batch, 0); /*DW19*/
2717     OUT_BCS_BATCH(batch, 0); /*DW20*/
2718     OUT_BCS_BATCH(batch, 0); /*DW21*/
2719
2720     OUT_BCS_BATCH(batch, 
2721                  pic_param->pic_flags.bits.show_frame << 23 |
2722                  pic_param->pic_flags.bits.version << 20
2723                  ); /*DW22*/
2724
2725     OUT_BCS_BATCH(batch,
2726                  (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
2727                  (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
2728                  );
2729
2730     /*DW24*/
2731     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
2732
2733     /*DW25*/
2734     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
2735
2736     /*DW26*/
2737     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
2738
2739     /*DW27*/
2740     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
2741
2742     /*DW28*/
2743     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
2744
2745     /*DW29*/
2746     OUT_BCS_BATCH(batch, 0);
2747
2748     ADVANCE_BCS_BATCH(batch);
2749 }
2750
2751 static void
2752 gen9_mfc_vp8_pic_state(VADriverContextP ctx,
2753                        struct encode_state *encode_state,
2754                        struct intel_encoder_context *encoder_context)
2755 {
2756     struct intel_batchbuffer *batch = encoder_context->base.batch;
2757     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2758     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
2759     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
2760     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
2761     int i, j, log2num;
2762
2763     assert(pic_param->pic_flags.bits.num_token_partitions > 0);
2764     assert(pic_param->pic_flags.bits.num_token_partitions < 9);
2765     log2num = (int)log2(pic_param->pic_flags.bits.num_token_partitions);
2766
2767     /*update mode and token probs*/
2768     vp8_enc_state_update(mfc_context, q_matrix);
2769  
2770     BEGIN_BCS_BATCH(batch, 38);
2771     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2772     OUT_BCS_BATCH(batch,
2773                   (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
2774                   (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
2775  
2776     OUT_BCS_BATCH(batch,
2777                   log2num << 24 |
2778                   pic_param->sharpness_level << 16 |
2779                   pic_param->pic_flags.bits.sign_bias_alternate << 13 |
2780                   pic_param->pic_flags.bits.sign_bias_golden << 12 |
2781                   pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
2782                   pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
2783                   pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
2784                   pic_param->pic_flags.bits.segmentation_enabled << 8 |
2785                   !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2786                   (pic_param->pic_flags.bits.version / 2) << 4 |
2787                   (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2788                   !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
2789  
2790     OUT_BCS_BATCH(batch,
2791                   pic_param->loop_filter_level[3] << 24 |
2792                   pic_param->loop_filter_level[2] << 16 |
2793                   pic_param->loop_filter_level[1] <<  8 |
2794                   pic_param->loop_filter_level[0] <<  0);
2795
2796     OUT_BCS_BATCH(batch,
2797                   q_matrix->quantization_index[3] << 24 |
2798                   q_matrix->quantization_index[2] << 16 |
2799                   q_matrix->quantization_index[1] <<  8 |
2800                   q_matrix->quantization_index[0] << 0);
2801
2802     OUT_BCS_BATCH(batch,
2803                  ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 | 
2804                  abs(q_matrix->quantization_index_delta[4]) << 24 |
2805                  ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 | 
2806                  abs(q_matrix->quantization_index_delta[3]) << 16 |
2807                  ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 | 
2808                  abs(q_matrix->quantization_index_delta[2]) << 8 |
2809                  ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 | 
2810                  abs(q_matrix->quantization_index_delta[1]) << 0);
2811
2812     OUT_BCS_BATCH(batch,
2813                  ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
2814                  abs(q_matrix->quantization_index_delta[0]) << 0);
2815     
2816     OUT_BCS_BATCH(batch,
2817                  pic_param->clamp_qindex_high << 8 |
2818                  pic_param->clamp_qindex_low << 0);
2819
2820     for (i = 8; i < 19; i++) {
2821          OUT_BCS_BATCH(batch, 0xffffffff);
2822     }
2823
2824     OUT_BCS_BATCH(batch,
2825                   mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
2826                   mfc_context->vp8_state.mb_segment_tree_probs[1] <<  8 |
2827                   mfc_context->vp8_state.mb_segment_tree_probs[0] <<  0);
2828
2829     OUT_BCS_BATCH(batch,
2830                   mfc_context->vp8_state.prob_skip_false << 24 |
2831                   mfc_context->vp8_state.prob_intra      << 16 |
2832                   mfc_context->vp8_state.prob_last       <<  8 |
2833                   mfc_context->vp8_state.prob_gf         <<  0);
2834
2835     OUT_BCS_BATCH(batch,
2836                   mfc_context->vp8_state.y_mode_probs[3] << 24 |
2837                   mfc_context->vp8_state.y_mode_probs[2] << 16 |
2838                   mfc_context->vp8_state.y_mode_probs[1] <<  8 |
2839                   mfc_context->vp8_state.y_mode_probs[0] <<  0);
2840
2841     OUT_BCS_BATCH(batch,
2842                   mfc_context->vp8_state.uv_mode_probs[2] << 16 |
2843                   mfc_context->vp8_state.uv_mode_probs[1] <<  8 |
2844                   mfc_context->vp8_state.uv_mode_probs[0] <<  0);
2845     
2846     /* MV update value, DW23-DW32 */
2847     for (i = 0; i < 2; i++) {
2848         for (j = 0; j < 20; j += 4) {
2849             OUT_BCS_BATCH(batch,
2850                           (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
2851                           mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
2852                           mfc_context->vp8_state.mv_probs[i][j + 1] <<  8 |
2853                           mfc_context->vp8_state.mv_probs[i][j + 0] <<  0);
2854         }
2855     }
2856
2857     OUT_BCS_BATCH(batch,
2858                   (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
2859                   (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
2860                   (pic_param->ref_lf_delta[1] & 0x7f) <<  8 |
2861                   (pic_param->ref_lf_delta[0] & 0x7f) <<  0);
2862
2863     OUT_BCS_BATCH(batch,
2864                   (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
2865                   (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
2866                   (pic_param->mode_lf_delta[1] & 0x7f) <<  8 |
2867                   (pic_param->mode_lf_delta[0] & 0x7f) <<  0);
2868
2869     OUT_BCS_BATCH(batch, 0);
2870     OUT_BCS_BATCH(batch, 0);
2871     OUT_BCS_BATCH(batch, 0);
2872
2873     ADVANCE_BCS_BATCH(batch);
2874 }
2875
2876 #define OUT_VP8_BUFFER(bo, offset)                                      \
2877     if (bo)                                                             \
2878         OUT_BCS_RELOC(batch,                                            \
2879                       bo,                                               \
2880                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
2881                       offset);                                           \
2882     else                                                                \
2883         OUT_BCS_BATCH(batch, 0);                                        \
2884     OUT_BCS_BATCH(batch, 0);                                            \
2885     OUT_BCS_BATCH(batch, 0);
2886
2887 static void 
2888 gen9_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx, 
2889                                      struct encode_state *encode_state,
2890                                      struct intel_encoder_context *encoder_context)
2891 {
2892     struct intel_batchbuffer *batch = encoder_context->base.batch;
2893     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2894
2895     BEGIN_BCS_BATCH(batch, 32);
2896     OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
2897
2898     OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
2899
2900     OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
2901     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
2902     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
2903     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
2904     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
2905     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
2906     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
2907     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
2908     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
2909     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
2910
2911     OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
2912     OUT_BCS_BATCH(batch, 0);
2913
2914     OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
2915     OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
2916     OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
2917     OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
2918
2919     ADVANCE_BCS_BATCH(batch);
2920 }
2921
2922 static void
2923 gen9_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
2924                                            struct encode_state *encode_state,
2925                                            struct intel_encoder_context *encoder_context)
2926 {
2927     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2928
2929     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
2930     mfc_context->set_surface_state(ctx, encoder_context);
2931     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2932     gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
2933     gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2934     gen9_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
2935     gen9_mfc_vp8_pic_state(ctx, encode_state,encoder_context);
2936     gen9_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
2937 }
2938
2939 static void
2940 gen9_mfc_vp8_pak_object_intra(VADriverContextP ctx, 
2941                               struct intel_encoder_context *encoder_context,
2942                               unsigned int *msg,
2943                               int x, int y,
2944                               struct intel_batchbuffer *batch)
2945 {
2946     if (batch == NULL)
2947         batch = encoder_context->base.batch;
2948
2949     BEGIN_BCS_BATCH(batch, 7);
2950
2951     OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
2952     OUT_BCS_BATCH(batch, 0);
2953     OUT_BCS_BATCH(batch, 0);
2954     OUT_BCS_BATCH(batch,
2955                   (0 << 20) |           /* mv format: intra mb */
2956                   (0 << 18) |           /* Segment ID */
2957                   (0 << 17) |           /* disable coeff clamp */
2958                   (1 << 13) |           /* intra mb flag */
2959                   (0 << 11) |           /* refer picture select: last frame */
2960                   (0 << 8) |            /* mb type: 16x16 intra mb */
2961                   (0 << 4) |            /* mb uv mode: dc_pred */
2962                   (0 << 2) |            /* skip mb flag: disable */
2963                   0);
2964
2965     OUT_BCS_BATCH(batch, (y << 16) | x);
2966     OUT_BCS_BATCH(batch, 0);  /* y_mode: dc_pred */
2967     OUT_BCS_BATCH(batch, 0);
2968
2969     ADVANCE_BCS_BATCH(batch);
2970 }
2971
2972 static void
2973 gen9_mfc_vp8_pak_object_inter(VADriverContextP ctx, 
2974                               struct intel_encoder_context *encoder_context,
2975                               unsigned int *msg,
2976                               int x, int y,
2977                               struct intel_batchbuffer *batch)
2978 {
2979     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2980
2981     if (batch == NULL)
2982         batch = encoder_context->base.batch;
2983
2984     BEGIN_BCS_BATCH(batch, 7);
2985
2986     OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
2987     OUT_BCS_BATCH(batch,
2988                   (0 << 29) |           /* enable inline mv data: disable */
2989                   64);
2990     OUT_BCS_BATCH(batch,
2991                   0);
2992     OUT_BCS_BATCH(batch,
2993                   (4 << 20) |           /* mv format: inter */
2994                   (0 << 18) |           /* Segment ID */
2995                   (0 << 17) |           /* coeff clamp: disable */
2996                   (0 << 13) |           /* intra mb flag: inter mb */
2997                   (0 << 11) |           /* refer picture select: last frame */
2998                   (0 << 8) |            /* mb type: 16x16 */
2999                   (0 << 4) |            /* mb uv mode: dc_pred */
3000                   (0 << 2) |            /* skip mb flag: disable */
3001                   0);
3002
3003     OUT_BCS_BATCH(batch, (y << 16) | x);
3004
3005     /*zero mv*/
3006     OUT_BCS_BATCH(batch, 0x88888888);
3007     OUT_BCS_BATCH(batch, 0x88888888);
3008
3009     ADVANCE_BCS_BATCH(batch);
3010 }
3011
3012 static void
3013 gen9_mfc_vp8_pak_pipeline(VADriverContextP ctx,
3014                           struct encode_state *encode_state,
3015                           struct intel_encoder_context *encoder_context,
3016                           struct intel_batchbuffer *slice_batch)
3017 {
3018     struct gen6_vme_context *vme_context = encoder_context->vme_context;
3019     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3020     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3021     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3022     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3023     unsigned int *msg = NULL;
3024     unsigned char *msg_ptr = NULL;
3025     unsigned int i, is_intra_frame;
3026
3027     is_intra_frame = !pic_param->pic_flags.bits.frame_type;
3028
3029     dri_bo_map(vme_context->vme_output.bo , 1);
3030     msg = msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
3031
3032     for( i = 0; i < width_in_mbs * height_in_mbs; i++) {
3033         int h_pos = i % width_in_mbs;
3034         int v_pos = i / width_in_mbs;
3035
3036         if (is_intra_frame) {
3037             gen9_mfc_vp8_pak_object_intra(ctx,
3038                     encoder_context,
3039                     msg,
3040                     h_pos, v_pos,
3041                     slice_batch);
3042         } else {
3043             gen9_mfc_vp8_pak_object_inter(ctx,
3044                     encoder_context,
3045                     msg,
3046                     h_pos, v_pos,
3047                     slice_batch);
3048         }
3049     }
3050
3051     dri_bo_unmap(vme_context->vme_output.bo);
3052 }
3053
3054 /*
3055  * A batch buffer for vp8 pak object commands
3056  */
3057 static dri_bo *
3058 gen9_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
3059                                           struct encode_state *encode_state,
3060                                           struct intel_encoder_context *encoder_context)
3061 {
3062     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3063     struct intel_batchbuffer *batch;
3064     dri_bo *batch_bo;
3065
3066     batch = mfc_context->aux_batchbuffer;
3067     batch_bo = batch->buffer;
3068
3069     gen9_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
3070
3071     intel_batchbuffer_align(batch, 8);
3072
3073     BEGIN_BCS_BATCH(batch, 2);
3074     OUT_BCS_BATCH(batch, 0);
3075     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
3076     ADVANCE_BCS_BATCH(batch);
3077
3078     dri_bo_reference(batch_bo);
3079     intel_batchbuffer_free(batch);
3080     mfc_context->aux_batchbuffer = NULL;
3081
3082     return batch_bo;
3083 }
3084
3085 static void
3086 gen9_mfc_vp8_pipeline_programing(VADriverContextP ctx,
3087                                    struct encode_state *encode_state,
3088                                    struct intel_encoder_context *encoder_context)
3089 {
3090     struct intel_batchbuffer *batch = encoder_context->base.batch;
3091     dri_bo *slice_batch_bo;
3092
3093     slice_batch_bo = gen9_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
3094
3095     // begin programing
3096     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
3097     intel_batchbuffer_emit_mi_flush(batch);
3098
3099     // picture level programing
3100     gen9_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
3101
3102     BEGIN_BCS_BATCH(batch, 4);
3103     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
3104     OUT_BCS_RELOC(batch,
3105                   slice_batch_bo,
3106                   I915_GEM_DOMAIN_COMMAND, 0,
3107                   0);
3108     OUT_BCS_BATCH(batch, 0);
3109     OUT_BCS_BATCH(batch, 0);
3110     ADVANCE_BCS_BATCH(batch);
3111
3112     // end programing
3113     intel_batchbuffer_end_atomic(batch);
3114
3115     dri_bo_unreference(slice_batch_bo);
3116 }
3117
3118 static void gen9_mfc_calc_vp8_coded_buffer_size(VADriverContextP ctx,
3119                           struct encode_state *encode_state,
3120                           struct intel_encoder_context *encoder_context)
3121 {
3122     struct i965_driver_data *i965 = i965_driver_data(ctx);
3123     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3124     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3125     unsigned char is_intra_frame = !pic_param->pic_flags.bits.frame_type;
3126     unsigned int *vp8_encoding_status, first_partition_bytes, token_partition_bytes, vp8_coded_bytes;
3127
3128     dri_bo_map(mfc_context->vp8_state.token_statistics_bo, 0);
3129
3130     vp8_encoding_status = (unsigned int *)mfc_context->vp8_state.token_statistics_bo->virtual;
3131     first_partition_bytes = (*vp8_encoding_status + 7) / 8;
3132     token_partition_bytes = (*(unsigned int *)(vp8_encoding_status + 9) + 7) / 8;
3133     
3134     /*coded_bytes includes P0~P8 partitions bytes + uncompresse date bytes + partion_size bytes in bitstream */
3135     vp8_coded_bytes = first_partition_bytes + token_partition_bytes + (3 + 7 * !!is_intra_frame) + (pic_param->pic_flags.bits.num_token_partitions - 1) * 3;
3136
3137     dri_bo_unmap(mfc_context->vp8_state.token_statistics_bo);
3138
3139     dri_bo_map(mfc_context->vp8_state.final_frame_bo, 0);
3140     struct i965_coded_buffer_segment *coded_buffer_segment = (struct i965_coded_buffer_segment *)(mfc_context->vp8_state.final_frame_bo->virtual);
3141     coded_buffer_segment->base.size = vp8_coded_bytes;
3142     dri_bo_unmap(mfc_context->vp8_state.final_frame_bo);
3143 }
3144
3145 static VAStatus
3146 gen9_mfc_vp8_encode_picture(VADriverContextP ctx,
3147                               struct encode_state *encode_state,
3148                               struct intel_encoder_context *encoder_context)
3149 {
3150     gen9_mfc_vp8_init(ctx, encode_state, encoder_context);
3151     intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
3152     /*Programing bcs pipeline*/
3153     gen9_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
3154     gen9_mfc_run(ctx, encode_state, encoder_context);
3155     gen9_mfc_calc_vp8_coded_buffer_size(ctx, encode_state, encoder_context);
3156
3157     return VA_STATUS_SUCCESS;
3158 }
3159
3160 static void
3161 gen9_mfc_context_destroy(void *context)
3162 {
3163     struct gen6_mfc_context *mfc_context = context;
3164     int i;
3165
3166     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
3167     mfc_context->post_deblocking_output.bo = NULL;
3168
3169     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
3170     mfc_context->pre_deblocking_output.bo = NULL;
3171
3172     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
3173     mfc_context->uncompressed_picture_source.bo = NULL;
3174
3175     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
3176     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
3177
3178     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
3179         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
3180         mfc_context->direct_mv_buffers[i].bo = NULL;
3181     }
3182
3183     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
3184     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
3185
3186     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
3187     mfc_context->macroblock_status_buffer.bo = NULL;
3188
3189     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
3190     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3191
3192     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
3193     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3194
3195
3196     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
3197         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
3198         mfc_context->reference_surfaces[i].bo = NULL;
3199     }
3200
3201     i965_gpe_context_destroy(&mfc_context->gpe_context);
3202
3203     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
3204     mfc_context->mfc_batchbuffer_surface.bo = NULL;
3205
3206     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
3207     mfc_context->aux_batchbuffer_surface.bo = NULL;
3208
3209     if (mfc_context->aux_batchbuffer)
3210         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
3211
3212     mfc_context->aux_batchbuffer = NULL;
3213
3214     dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3215     mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
3216
3217     dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
3218     mfc_context->vp8_state.final_frame_bo = NULL;
3219
3220     dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
3221     mfc_context->vp8_state.frame_header_bo = NULL;
3222
3223     dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
3224     mfc_context->vp8_state.intermediate_bo = NULL;
3225
3226     dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
3227     mfc_context->vp8_state.mpc_row_store_bo = NULL;
3228
3229     dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
3230     mfc_context->vp8_state.stream_out_bo = NULL;
3231
3232     dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
3233     mfc_context->vp8_state.token_statistics_bo = NULL;
3234
3235     free(mfc_context);
3236 }
3237
3238 static VAStatus gen9_mfc_pipeline(VADriverContextP ctx,
3239                                   VAProfile profile,
3240                                   struct encode_state *encode_state,
3241                                   struct intel_encoder_context *encoder_context)
3242 {
3243     VAStatus vaStatus;
3244
3245     switch (profile) {
3246     case VAProfileH264ConstrainedBaseline:
3247     case VAProfileH264Main:
3248     case VAProfileH264High:
3249     case VAProfileH264MultiviewHigh:
3250     case VAProfileH264StereoHigh:
3251         vaStatus = gen9_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
3252         break;
3253
3254         /* FIXME: add for other profile */
3255     case VAProfileMPEG2Simple:
3256     case VAProfileMPEG2Main:
3257         vaStatus = gen9_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
3258         break;
3259
3260     case VAProfileVP8Version0_3:
3261         vaStatus = gen9_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
3262         break;
3263
3264     default:
3265         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3266         break;
3267     }
3268
3269     return vaStatus;
3270 }
3271
3272 Bool gen9_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3273 {
3274     struct gen6_mfc_context *mfc_context = NULL;
3275
3276 #if MFC_SOFTWARE_HASWELL
3277     if ((encoder_context->codec == CODEC_H264) ||
3278         (encoder_context->codec == CODEC_H264_MVC) ||
3279         (encoder_context->codec == CODEC_MPEG2)) {
3280
3281         return gen8_mfc_context_init(ctx, encoder_context);
3282     }
3283 #endif
3284
3285     mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
3286     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
3287
3288     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
3289     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
3290
3291     mfc_context->gpe_context.curbe.length = 32 * 4;
3292
3293     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
3294     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
3295     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
3296     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
3297     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
3298
3299     i965_gpe_load_kernels(ctx,
3300                           &mfc_context->gpe_context,
3301                           gen9_mfc_kernels,
3302                           NUM_MFC_KERNEL);
3303
3304     mfc_context->pipe_mode_select = gen9_mfc_pipe_mode_select;
3305     mfc_context->set_surface_state = gen9_mfc_surface_state;
3306     mfc_context->ind_obj_base_addr_state = gen9_mfc_ind_obj_base_addr_state;
3307     mfc_context->avc_img_state = gen9_mfc_avc_img_state;
3308     mfc_context->avc_qm_state = gen9_mfc_avc_qm_state;
3309     mfc_context->avc_fqm_state = gen9_mfc_avc_fqm_state;
3310     mfc_context->insert_object = gen9_mfc_avc_insert_object;
3311     mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
3312
3313     encoder_context->mfc_context = mfc_context;
3314     encoder_context->mfc_context_destroy = gen9_mfc_context_destroy;
3315     encoder_context->mfc_pipeline = gen9_mfc_pipeline;
3316     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
3317
3318     return True;
3319 }