OSDN Git Service

Use arrays to store BRC related parameters per temporal layer
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45 #include <va/va_enc_jpeg.h>
46 #include "vp8_probs.h"
47
48 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
49 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
50 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
51
52 #define MFC_SOFTWARE_BATCH      0
53
54 #define B0_STEP_REV             2
55 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
56
57 //Zigzag scan order of the the Luma and Chroma components
58 //Note: Jpeg Spec ISO/IEC 10918-1, Figure A.6 shows the zigzag order differently.
59 //The Spec is trying to show the zigzag pattern with number positions. The below
60 //table will use the pattern shown by A.6 and map the position of the elements in the array
61 static const uint32_t zigzag_direct[64] = {
62     0,   1,  8, 16,  9,  2,  3, 10,
63     17, 24, 32, 25, 18, 11,  4,  5,
64     12, 19, 26, 33, 40, 48, 41, 34,
65     27, 20, 13,  6,  7, 14, 21, 28,
66     35, 42, 49, 56, 57, 50, 43, 36,
67     29, 22, 15, 23, 30, 37, 44, 51,
68     58, 59, 52, 45, 38, 31, 39, 46,
69     53, 60, 61, 54, 47, 55, 62, 63
70 };
71
72 //Default Luminance quantization table
73 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.1
74 static const uint8_t jpeg_luma_quant[64] = {
75     16, 11, 10, 16, 24,  40,  51,  61,
76     12, 12, 14, 19, 26,  58,  60,  55,
77     14, 13, 16, 24, 40,  57,  69,  56,
78     14, 17, 22, 29, 51,  87,  80,  62,
79     18, 22, 37, 56, 68,  109, 103, 77,
80     24, 35, 55, 64, 81,  104, 113, 92,
81     49, 64, 78, 87, 103, 121, 120, 101,
82     72, 92, 95, 98, 112, 100, 103, 99    
83 };
84
85 //Default Chroma quantization table
86 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.2
87 static const uint8_t jpeg_chroma_quant[64] = {
88     17, 18, 24, 47, 99, 99, 99, 99,
89     18, 21, 26, 66, 99, 99, 99, 99,
90     24, 26, 56, 99, 99, 99, 99, 99,
91     47, 66, 99, 99, 99, 99, 99, 99,
92     99, 99, 99, 99, 99, 99, 99, 99,
93     99, 99, 99, 99, 99, 99, 99, 99,
94     99, 99, 99, 99, 99, 99, 99, 99,
95     99, 99, 99, 99, 99, 99, 99, 99
96 };
97
98
99 static const int va_to_gen7_jpeg_hufftable[2] = {
100     MFX_HUFFTABLE_ID_Y,
101     MFX_HUFFTABLE_ID_UV
102 };
103
104 static const uint32_t gen8_mfc_batchbuffer_avc[][4] = {
105 #include "shaders/utils/mfc_batchbuffer_hsw.g8b"
106 };
107
108 static const uint32_t gen9_mfc_batchbuffer_avc[][4] = {
109 #include "shaders/utils/mfc_batchbuffer_hsw.g9b"
110 };
111
112 static struct i965_kernel gen8_mfc_kernels[] = {
113     {
114         "MFC AVC INTRA BATCHBUFFER ",
115         MFC_BATCHBUFFER_AVC_INTRA,
116         gen8_mfc_batchbuffer_avc,
117         sizeof(gen8_mfc_batchbuffer_avc),
118         NULL
119     },
120 };
121
122 static struct i965_kernel gen9_mfc_kernels[] = {
123     {
124         "MFC AVC INTRA BATCHBUFFER ",
125         MFC_BATCHBUFFER_AVC_INTRA,
126         gen9_mfc_batchbuffer_avc,
127         sizeof(gen9_mfc_batchbuffer_avc),
128         NULL
129     },
130 };
131
132 static const uint32_t qm_flat[16] = {
133     0x10101010, 0x10101010, 0x10101010, 0x10101010,
134     0x10101010, 0x10101010, 0x10101010, 0x10101010,
135     0x10101010, 0x10101010, 0x10101010, 0x10101010,
136     0x10101010, 0x10101010, 0x10101010, 0x10101010
137 };
138
139 static const uint32_t fqm_flat[32] = {
140     0x10001000, 0x10001000, 0x10001000, 0x10001000,
141     0x10001000, 0x10001000, 0x10001000, 0x10001000,
142     0x10001000, 0x10001000, 0x10001000, 0x10001000,
143     0x10001000, 0x10001000, 0x10001000, 0x10001000,
144     0x10001000, 0x10001000, 0x10001000, 0x10001000,
145     0x10001000, 0x10001000, 0x10001000, 0x10001000,
146     0x10001000, 0x10001000, 0x10001000, 0x10001000,
147     0x10001000, 0x10001000, 0x10001000, 0x10001000
148 };
149
150 #define         INTER_MODE_MASK         0x03
151 #define         INTER_8X8               0x03
152 #define         INTER_16X8              0x01
153 #define         INTER_8X16              0x02
154 #define         SUBMB_SHAPE_MASK        0x00FF00
155 #define         INTER_16X16             0x00
156
157 #define         INTER_MV8               (4 << 20)
158 #define         INTER_MV32              (6 << 20)
159
160
161 static void
162 gen8_mfc_pipe_mode_select(VADriverContextP ctx,
163                           int standard_select,
164                           struct intel_encoder_context *encoder_context)
165 {
166     struct intel_batchbuffer *batch = encoder_context->base.batch;
167     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
168
169     assert(standard_select == MFX_FORMAT_MPEG2 ||
170            standard_select == MFX_FORMAT_AVC   ||
171            standard_select == MFX_FORMAT_JPEG  ||
172            standard_select == MFX_FORMAT_VP8);
173
174     BEGIN_BCS_BATCH(batch, 5);
175
176     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
177     OUT_BCS_BATCH(batch,
178                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
179                   (MFD_MODE_VLD << 15) | /* VLD mode */
180                   (0 << 10) | /* Stream-Out Enable */
181                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
182                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
183                   (0 << 6)  | /* frame statistics stream-out enable*/
184                   (0 << 5)  | /* not in stitch mode */
185                   (1 << 4)  | /* encoding mode */
186                   (standard_select << 0));  /* standard select: avc or mpeg2 or jpeg*/
187     OUT_BCS_BATCH(batch,
188                   (0 << 7)  | /* expand NOA bus flag */
189                   (0 << 6)  | /* disable slice-level clock gating */
190                   (0 << 5)  | /* disable clock gating for NOA */
191                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
192                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
193                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
194                   (0 << 1)  |
195                   (0 << 0));
196     OUT_BCS_BATCH(batch, 0);
197     OUT_BCS_BATCH(batch, 0);
198
199     ADVANCE_BCS_BATCH(batch);
200 }
201
202 static void
203 gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
204 {
205     struct intel_batchbuffer *batch = encoder_context->base.batch;
206     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
207
208     BEGIN_BCS_BATCH(batch, 6);
209
210     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
211     OUT_BCS_BATCH(batch, 0);
212     OUT_BCS_BATCH(batch,
213                   ((mfc_context->surface_state.height - 1) << 18) |
214                   ((mfc_context->surface_state.width - 1) << 4));
215     OUT_BCS_BATCH(batch,
216                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
217                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
218                   (0 << 22) | /* surface object control state, FIXME??? */
219                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
220                   (0 << 2)  | /* must be 0 for interleave U/V */
221                   (1 << 1)  | /* must be tiled */
222                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
223     OUT_BCS_BATCH(batch,
224                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
225                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
226     OUT_BCS_BATCH(batch, 0);
227
228     ADVANCE_BCS_BATCH(batch);
229 }
230
231 static void
232 gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
233                                  struct intel_encoder_context *encoder_context)
234 {
235     struct intel_batchbuffer *batch = encoder_context->base.batch;
236     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
237     struct gen6_vme_context *vme_context = encoder_context->vme_context;
238     int vme_size;
239     unsigned int bse_offset;
240
241     BEGIN_BCS_BATCH(batch, 26);
242
243     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
244     /* the DW1-3 is for the MFX indirect bistream offset */
245     OUT_BCS_BATCH(batch, 0);
246     OUT_BCS_BATCH(batch, 0);
247     OUT_BCS_BATCH(batch, 0);
248
249     /* the DW4-5 is the MFX upper bound */
250     if (encoder_context->codec == CODEC_VP8) {
251         OUT_BCS_RELOC(batch,
252                 mfc_context->mfc_indirect_pak_bse_object.bo,
253                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
254                 mfc_context->mfc_indirect_pak_bse_object.end_offset);
255         OUT_BCS_BATCH(batch, 0);
256     } else {
257         OUT_BCS_BATCH(batch, 0);
258         OUT_BCS_BATCH(batch, 0);
259     }
260
261     if(encoder_context->codec != CODEC_JPEG) {
262         vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
263         /* the DW6-10 is for MFX Indirect MV Object Base Address */
264         OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
265         OUT_BCS_BATCH(batch, 0);
266         OUT_BCS_BATCH(batch, 0);
267         OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
268         OUT_BCS_BATCH(batch, 0);
269     } else {
270         /* No VME for JPEG */
271         OUT_BCS_BATCH(batch, 0);
272         OUT_BCS_BATCH(batch, 0);
273         OUT_BCS_BATCH(batch, 0);
274         OUT_BCS_BATCH(batch, 0);
275         OUT_BCS_BATCH(batch, 0);
276     }
277
278     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
279     OUT_BCS_BATCH(batch, 0);
280     OUT_BCS_BATCH(batch, 0);
281     OUT_BCS_BATCH(batch, 0);
282     OUT_BCS_BATCH(batch, 0);
283     OUT_BCS_BATCH(batch, 0);
284
285     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
286     OUT_BCS_BATCH(batch, 0);
287     OUT_BCS_BATCH(batch, 0);
288     OUT_BCS_BATCH(batch, 0);
289     OUT_BCS_BATCH(batch, 0);
290     OUT_BCS_BATCH(batch, 0);
291
292     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
293     bse_offset = (encoder_context->codec == CODEC_JPEG) ? (mfc_context->mfc_indirect_pak_bse_object.offset) : 0;
294     OUT_BCS_RELOC(batch,
295                   mfc_context->mfc_indirect_pak_bse_object.bo,
296                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
297                   bse_offset);
298     OUT_BCS_BATCH(batch, 0);
299     OUT_BCS_BATCH(batch, 0);
300         
301     OUT_BCS_RELOC(batch,
302                   mfc_context->mfc_indirect_pak_bse_object.bo,
303                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
304                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
305     OUT_BCS_BATCH(batch, 0);
306
307     ADVANCE_BCS_BATCH(batch);
308 }
309
310 static void
311 gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
312                        struct intel_encoder_context *encoder_context)
313 {
314     struct intel_batchbuffer *batch = encoder_context->base.batch;
315     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
316     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
317
318     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
319     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
320
321     BEGIN_BCS_BATCH(batch, 16);
322
323     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
324     /*DW1. MB setting of frame */
325     OUT_BCS_BATCH(batch,
326                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
327     OUT_BCS_BATCH(batch, 
328                   ((height_in_mbs - 1) << 16) | 
329                   ((width_in_mbs - 1) << 0));
330     /* DW3 QP setting */
331     OUT_BCS_BATCH(batch, 
332                   (0 << 24) |   /* Second Chroma QP Offset */
333                   (0 << 16) |   /* Chroma QP Offset */
334                   (0 << 14) |   /* Max-bit conformance Intra flag */
335                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
336                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
337                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
338                   (0 << 8)  |   /* FIXME: Image Structure */
339                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
340     OUT_BCS_BATCH(batch,
341                   (0 << 16) |   /* Mininum Frame size */
342                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
343                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
344                   (0 << 13) |   /* CABAC 0 word insertion test enable */
345                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
346                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
347                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
348                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
349                   (0 << 6)  |   /* Only valid for VLD decoding mode */
350                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
351                   (0 << 4)  |   /* Direct 8x8 inference flag */
352                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
353                   (1 << 2)  |   /* Frame MB only flag */
354                   (0 << 1)  |   /* MBAFF mode is in active */
355                   (0 << 0));    /* Field picture flag */
356     /* DW5 Trellis quantization */
357     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
358     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
359                   (0xBB8 << 16) |       /* InterMbMaxSz */
360                   (0xEE8) );            /* IntraMbMaxSz */
361     OUT_BCS_BATCH(batch, 0);            /* Reserved */
362     /* DW8. QP delta */
363     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
364     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
365     /* DW10. Bit setting for MB */
366     OUT_BCS_BATCH(batch, 0x8C000000);
367     OUT_BCS_BATCH(batch, 0x00010000);
368     /* DW12. */
369     OUT_BCS_BATCH(batch, 0);
370     OUT_BCS_BATCH(batch, 0x02010100);
371     /* DW14. For short format */
372     OUT_BCS_BATCH(batch, 0);
373     OUT_BCS_BATCH(batch, 0);
374
375     ADVANCE_BCS_BATCH(batch);
376 }
377
378 static void
379 gen8_mfc_qm_state(VADriverContextP ctx,
380                   int qm_type,
381                   const uint32_t *qm,
382                   int qm_length,
383                   struct intel_encoder_context *encoder_context)
384 {
385     struct intel_batchbuffer *batch = encoder_context->base.batch;
386     unsigned int qm_buffer[16];
387
388     assert(qm_length <= 16);
389     assert(sizeof(*qm) == 4);
390     memcpy(qm_buffer, qm, qm_length * 4);
391
392     BEGIN_BCS_BATCH(batch, 18);
393     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
394     OUT_BCS_BATCH(batch, qm_type << 0);
395     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
396     ADVANCE_BCS_BATCH(batch);
397 }
398
399 static void
400 gen8_mfc_avc_qm_state(VADriverContextP ctx,
401                       struct encode_state *encode_state,
402                       struct intel_encoder_context *encoder_context)
403 {
404     const unsigned int *qm_4x4_intra;
405     const unsigned int *qm_4x4_inter;
406     const unsigned int *qm_8x8_intra;
407     const unsigned int *qm_8x8_inter;
408     VAEncSequenceParameterBufferH264 *pSeqParameter =
409         (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
410     VAEncPictureParameterBufferH264 *pPicParameter =
411         (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
412
413     if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag
414         && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) {
415         qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
416     } else {
417         VAIQMatrixBufferH264 *qm;
418         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
419         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
420         qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
421         qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
422         qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
423         qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
424     }
425
426     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
427     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
428     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
429     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
430 }
431
432 static void
433 gen8_mfc_fqm_state(VADriverContextP ctx,
434                    int fqm_type,
435                    const uint32_t *fqm,
436                    int fqm_length,
437                    struct intel_encoder_context *encoder_context)
438 {
439     struct intel_batchbuffer *batch = encoder_context->base.batch;
440     unsigned int fqm_buffer[32];
441
442     assert(fqm_length <= 32);
443     assert(sizeof(*fqm) == 4);
444     memcpy(fqm_buffer, fqm, fqm_length * 4);
445
446     BEGIN_BCS_BATCH(batch, 34);
447     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
448     OUT_BCS_BATCH(batch, fqm_type << 0);
449     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
450     ADVANCE_BCS_BATCH(batch);
451 }
452
453 static void
454 gen8_mfc_avc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
455 {
456     int i, j;
457     for (i = 0; i < len; i++)
458        for (j = 0; j < len; j++)
459            fqm[i * len + j] = (1 << 16) / qm[j * len + i];
460 }
461
462 static void
463 gen8_mfc_avc_fqm_state(VADriverContextP ctx,
464                        struct encode_state *encode_state,
465                        struct intel_encoder_context *encoder_context)
466 {
467     VAEncSequenceParameterBufferH264 *pSeqParameter =
468         (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
469     VAEncPictureParameterBufferH264 *pPicParameter =
470         (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
471
472     if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag
473         && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) {
474         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
475         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
476         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
477         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
478     } else {
479         int i;
480         uint32_t fqm[32];
481         VAIQMatrixBufferH264 *qm;
482         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
483         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
484
485         for (i = 0; i < 3; i++)
486             gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
487         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
488
489         for (i = 3; i < 6; i++)
490             gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
491         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
492
493         gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
494         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
495
496         gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
497         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
498     }
499 }
500
501 static void
502 gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
503                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
504                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
505                            struct intel_batchbuffer *batch)
506 {
507     if (batch == NULL)
508         batch = encoder_context->base.batch;
509
510     if (data_bits_in_last_dw == 0)
511         data_bits_in_last_dw = 32;
512
513     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
514
515     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
516     OUT_BCS_BATCH(batch,
517                   (0 << 16) |   /* always start at offset 0 */
518                   (data_bits_in_last_dw << 8) |
519                   (skip_emul_byte_count << 4) |
520                   (!!emulation_flag << 3) |
521                   ((!!is_last_header) << 2) |
522                   ((!!is_end_of_slice) << 1) |
523                   (0 << 0));    /* FIXME: ??? */
524     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
525
526     ADVANCE_BCS_BATCH(batch);
527 }
528
529
530 static void gen8_mfc_init(VADriverContextP ctx,
531                           struct encode_state *encode_state,
532                           struct intel_encoder_context *encoder_context)
533 {
534     struct i965_driver_data *i965 = i965_driver_data(ctx);
535     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
536     dri_bo *bo;
537     int i;
538     int width_in_mbs = 0;
539     int height_in_mbs = 0;
540     int slice_batchbuffer_size;
541
542     if (encoder_context->codec == CODEC_H264 ||
543         encoder_context->codec == CODEC_H264_MVC) {
544         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
545         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
546         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
547     } else if (encoder_context->codec == CODEC_MPEG2) {
548         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
549
550         assert(encoder_context->codec == CODEC_MPEG2);
551
552         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
553         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
554     } else {
555         assert(encoder_context->codec == CODEC_JPEG);
556         VAEncPictureParameterBufferJPEG *pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
557
558         width_in_mbs = ALIGN(pic_param->picture_width, 16) / 16;
559         height_in_mbs = ALIGN(pic_param->picture_height, 16) / 16;
560     }
561
562     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
563                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
564
565     /*Encode common setup for MFC*/
566     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
567     mfc_context->post_deblocking_output.bo = NULL;
568
569     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
570     mfc_context->pre_deblocking_output.bo = NULL;
571
572     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
573     mfc_context->uncompressed_picture_source.bo = NULL;
574
575     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
576     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
577
578     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
579         if (mfc_context->direct_mv_buffers[i].bo != NULL)
580             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
581         mfc_context->direct_mv_buffers[i].bo = NULL;
582     }
583
584     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
585         if (mfc_context->reference_surfaces[i].bo != NULL)
586             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
587         mfc_context->reference_surfaces[i].bo = NULL;  
588     }
589
590     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
591     bo = dri_bo_alloc(i965->intel.bufmgr,
592                       "Buffer",
593                       width_in_mbs * 64,
594                       64);
595     assert(bo);
596     mfc_context->intra_row_store_scratch_buffer.bo = bo;
597
598     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
599     bo = dri_bo_alloc(i965->intel.bufmgr,
600                       "Buffer",
601                       width_in_mbs * height_in_mbs * 16,
602                       64);
603     assert(bo);
604     mfc_context->macroblock_status_buffer.bo = bo;
605
606     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
607     bo = dri_bo_alloc(i965->intel.bufmgr,
608                       "Buffer",
609                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
610                       64);
611     assert(bo);
612     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
613
614     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
615     bo = dri_bo_alloc(i965->intel.bufmgr,
616                       "Buffer",
617                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
618                       0x1000);
619     assert(bo);
620     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
621
622     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
623     mfc_context->mfc_batchbuffer_surface.bo = NULL;
624
625     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
626     mfc_context->aux_batchbuffer_surface.bo = NULL;
627
628     if (mfc_context->aux_batchbuffer)
629         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
630
631     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
632     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
633     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
634     mfc_context->aux_batchbuffer_surface.pitch = 16;
635     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
636     mfc_context->aux_batchbuffer_surface.size_block = 16;
637
638     gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
639 }
640
641 static void
642 gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx,
643                              struct intel_encoder_context *encoder_context)
644 {
645     struct intel_batchbuffer *batch = encoder_context->base.batch;
646     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
647     int i;
648
649     BEGIN_BCS_BATCH(batch, 61);
650
651     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
652
653     /* the DW1-3 is for pre_deblocking */
654     if (mfc_context->pre_deblocking_output.bo)
655         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
656                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
657                       0);
658     else
659         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
660
661     OUT_BCS_BATCH(batch, 0);
662     OUT_BCS_BATCH(batch, 0);
663     /* the DW4-6 is for the post_deblocking */
664
665     if (mfc_context->post_deblocking_output.bo)
666         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
667                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
668                       0);                                                                                       /* post output addr  */ 
669     else
670         OUT_BCS_BATCH(batch, 0);
671     
672     OUT_BCS_BATCH(batch, 0);
673     OUT_BCS_BATCH(batch, 0);
674
675     /* the DW7-9 is for the uncompressed_picture */
676     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
677                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
678                   0); /* uncompressed data */
679
680     OUT_BCS_BATCH(batch, 0);
681     OUT_BCS_BATCH(batch, 0);
682
683     /* the DW10-12 is for the mb status */
684     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
685                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
686                   0); /* StreamOut data*/
687     
688     OUT_BCS_BATCH(batch, 0);
689     OUT_BCS_BATCH(batch, 0);
690
691     /* the DW13-15 is for the intra_row_store_scratch */
692     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
693                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
694                   0);   
695
696     OUT_BCS_BATCH(batch, 0);
697     OUT_BCS_BATCH(batch, 0);
698
699     /* the DW16-18 is for the deblocking filter */
700     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
701                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
702                   0);
703
704     OUT_BCS_BATCH(batch, 0);
705     OUT_BCS_BATCH(batch, 0);
706
707     /* the DW 19-50 is for Reference pictures*/
708     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
709         if ( mfc_context->reference_surfaces[i].bo != NULL) {
710             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
711                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
712                           0);                   
713         } else {
714             OUT_BCS_BATCH(batch, 0);
715         }
716
717         OUT_BCS_BATCH(batch, 0);
718     }
719
720     OUT_BCS_BATCH(batch, 0);
721
722     /* The DW 52-54 is for the MB status buffer */
723     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
724                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
725                   0);                                                                                   /* Macroblock status buffer*/
726         
727     OUT_BCS_BATCH(batch, 0);
728     OUT_BCS_BATCH(batch, 0);
729
730     /* the DW 55-57 is the ILDB buffer */
731     OUT_BCS_BATCH(batch, 0);
732     OUT_BCS_BATCH(batch, 0);
733     OUT_BCS_BATCH(batch, 0);
734
735     /* the DW 58-60 is the second ILDB buffer */
736     OUT_BCS_BATCH(batch, 0);
737     OUT_BCS_BATCH(batch, 0);
738     OUT_BCS_BATCH(batch, 0);
739
740     ADVANCE_BCS_BATCH(batch);
741 }
742
743 static void
744 gen8_mfc_avc_directmode_state(VADriverContextP ctx,
745                               struct intel_encoder_context *encoder_context)
746 {
747     struct intel_batchbuffer *batch = encoder_context->base.batch;
748     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
749
750     int i;
751
752     BEGIN_BCS_BATCH(batch, 71);
753
754     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
755
756     /* Reference frames and Current frames */
757     /* the DW1-32 is for the direct MV for reference */
758     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
759         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
760             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
761                           I915_GEM_DOMAIN_INSTRUCTION, 0,
762                           0);
763             OUT_BCS_BATCH(batch, 0);
764         } else {
765             OUT_BCS_BATCH(batch, 0);
766             OUT_BCS_BATCH(batch, 0);
767         }
768     }
769     
770     OUT_BCS_BATCH(batch, 0);
771
772     /* the DW34-36 is the MV for the current reference */
773     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
774                   I915_GEM_DOMAIN_INSTRUCTION, 0,
775                   0);
776
777     OUT_BCS_BATCH(batch, 0);
778     OUT_BCS_BATCH(batch, 0);
779
780     /* POL list */
781     for(i = 0; i < 32; i++) {
782         OUT_BCS_BATCH(batch, i/2);
783     }
784     OUT_BCS_BATCH(batch, 0);
785     OUT_BCS_BATCH(batch, 0);
786
787     ADVANCE_BCS_BATCH(batch);
788 }
789
790
791 static void
792 gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
793                                  struct intel_encoder_context *encoder_context)
794 {
795     struct intel_batchbuffer *batch = encoder_context->base.batch;
796     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
797
798     BEGIN_BCS_BATCH(batch, 10);
799
800     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
801     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
802                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
803                   0);
804     OUT_BCS_BATCH(batch, 0);
805     OUT_BCS_BATCH(batch, 0);
806         
807     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
808     OUT_BCS_BATCH(batch, 0);
809     OUT_BCS_BATCH(batch, 0);
810     OUT_BCS_BATCH(batch, 0);
811
812     /* the DW7-9 is for Bitplane Read Buffer Base Address */
813     OUT_BCS_BATCH(batch, 0);
814     OUT_BCS_BATCH(batch, 0);
815     OUT_BCS_BATCH(batch, 0);
816
817     ADVANCE_BCS_BATCH(batch);
818 }
819
820
821 static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
822                                                       struct encode_state *encode_state,
823                                                       struct intel_encoder_context *encoder_context)
824 {
825     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
826
827     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
828     mfc_context->set_surface_state(ctx, encoder_context);
829     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
830     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
831     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
832     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
833     mfc_context->avc_qm_state(ctx, encode_state, encoder_context);
834     mfc_context->avc_fqm_state(ctx, encode_state, encoder_context);
835     gen8_mfc_avc_directmode_state(ctx, encoder_context); 
836     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
837 }
838
839
840 static VAStatus gen8_mfc_run(VADriverContextP ctx, 
841                              struct encode_state *encode_state,
842                              struct intel_encoder_context *encoder_context)
843 {
844     struct intel_batchbuffer *batch = encoder_context->base.batch;
845
846     intel_batchbuffer_flush(batch);             //run the pipeline
847
848     return VA_STATUS_SUCCESS;
849 }
850
851
852 static VAStatus
853 gen8_mfc_stop(VADriverContextP ctx, 
854               struct encode_state *encode_state,
855               struct intel_encoder_context *encoder_context,
856               int *encoded_bits_size)
857 {
858     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
859     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
860     VACodedBufferSegment *coded_buffer_segment;
861     
862     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
863     assert(vaStatus == VA_STATUS_SUCCESS);
864     *encoded_bits_size = coded_buffer_segment->size * 8;
865     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
866
867     return VA_STATUS_SUCCESS;
868 }
869
870
871 static void
872 gen8_mfc_avc_slice_state(VADriverContextP ctx,
873                          VAEncPictureParameterBufferH264 *pic_param,
874                          VAEncSliceParameterBufferH264 *slice_param,
875                          struct encode_state *encode_state,
876                          struct intel_encoder_context *encoder_context,
877                          int rate_control_enable,
878                          int qp,
879                          struct intel_batchbuffer *batch)
880 {
881     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
882     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
883     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
884     int beginmb = slice_param->macroblock_address;
885     int endmb = beginmb + slice_param->num_macroblocks;
886     int beginx = beginmb % width_in_mbs;
887     int beginy = beginmb / width_in_mbs;
888     int nextx =  endmb % width_in_mbs;
889     int nexty = endmb / width_in_mbs;
890     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
891     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
892     int maxQpN, maxQpP;
893     unsigned char correct[6], grow, shrink;
894     int i;
895     int weighted_pred_idc = 0;
896     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
897     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
898     int num_ref_l0 = 0, num_ref_l1 = 0;
899
900     if (batch == NULL)
901         batch = encoder_context->base.batch;
902
903     if (slice_type == SLICE_TYPE_I) {
904         luma_log2_weight_denom = 0;
905         chroma_log2_weight_denom = 0;
906     } else if (slice_type == SLICE_TYPE_P) {
907         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
908         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
909
910         if (slice_param->num_ref_idx_active_override_flag)
911             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
912     } else if (slice_type == SLICE_TYPE_B) {
913         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
914         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
915         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
916
917         if (slice_param->num_ref_idx_active_override_flag) {
918             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
919             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
920         }
921
922         if (weighted_pred_idc == 2) {
923             /* 8.4.3 - Derivation process for prediction weights (8-279) */
924             luma_log2_weight_denom = 5;
925             chroma_log2_weight_denom = 5;
926         }
927     }
928
929     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
930     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
931
932     for (i = 0; i < 6; i++)
933         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
934
935     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
936         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
937     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
938         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
939
940     BEGIN_BCS_BATCH(batch, 11);;
941
942     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
943     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
944
945     OUT_BCS_BATCH(batch,
946                   (num_ref_l0 << 16) |
947                   (num_ref_l1 << 24) |
948                   (chroma_log2_weight_denom << 8) |
949                   (luma_log2_weight_denom << 0));
950
951     OUT_BCS_BATCH(batch, 
952                   (weighted_pred_idc << 30) |
953                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
954                   (slice_param->disable_deblocking_filter_idc << 27) |
955                   (slice_param->cabac_init_idc << 24) |
956                   (qp<<16) |                    /*Slice Quantization Parameter*/
957                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
958                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
959     OUT_BCS_BATCH(batch,
960                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
961                   (beginx << 16) |
962                   slice_param->macroblock_address );
963     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
964     OUT_BCS_BATCH(batch, 
965                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
966                   (1 << 30) |           /*ResetRateControlCounter*/
967                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
968                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
969                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
970                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
971                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
972                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
973                   (last_slice << 19) |     /*IsLastSlice*/
974                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
975                   (1 << 17) |       /*HeaderPresentFlag*/       
976                   (1 << 16) |       /*SliceData PresentFlag*/
977                   (1 << 15) |       /*TailPresentFlag*/
978                   (1 << 13) |       /*RBSP NAL TYPE*/   
979                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
980     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
981     OUT_BCS_BATCH(batch,
982                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
983                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
984                   (shrink << 8)  |
985                   (grow << 0));   
986     OUT_BCS_BATCH(batch,
987                   (correct[5] << 20) |
988                   (correct[4] << 16) |
989                   (correct[3] << 12) |
990                   (correct[2] << 8) |
991                   (correct[1] << 4) |
992                   (correct[0] << 0));
993     OUT_BCS_BATCH(batch, 0);
994
995     ADVANCE_BCS_BATCH(batch);
996 }
997
998 #define    AVC_INTRA_RDO_OFFSET    4
999 #define    AVC_INTER_RDO_OFFSET    10
1000 #define    AVC_INTER_MSG_OFFSET    8
1001 #define    AVC_INTER_MV_OFFSET     48
1002 #define    AVC_RDO_MASK            0xFFFF
1003
1004 static int
1005 gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1006                               int qp,unsigned int *msg,
1007                               struct intel_encoder_context *encoder_context,
1008                               unsigned char target_mb_size, unsigned char max_mb_size,
1009                               struct intel_batchbuffer *batch)
1010 {
1011     int len_in_dwords = 12;
1012     unsigned int intra_msg;
1013 #define         INTRA_MSG_FLAG          (1 << 13)
1014 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1015     if (batch == NULL)
1016         batch = encoder_context->base.batch;
1017
1018     BEGIN_BCS_BATCH(batch, len_in_dwords);
1019
1020     intra_msg = msg[0] & 0xC0FF;
1021     intra_msg |= INTRA_MSG_FLAG;
1022     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1023     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1024     OUT_BCS_BATCH(batch, 0);
1025     OUT_BCS_BATCH(batch, 0);
1026     OUT_BCS_BATCH(batch, 
1027                   (0 << 24) |           /* PackedMvNum, Debug*/
1028                   (0 << 20) |           /* No motion vector */
1029                   (1 << 19) |           /* CbpDcY */
1030                   (1 << 18) |           /* CbpDcU */
1031                   (1 << 17) |           /* CbpDcV */
1032                   intra_msg);
1033
1034     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1035     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1036     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1037
1038     /*Stuff for Intra MB*/
1039     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1040     OUT_BCS_BATCH(batch, msg[2]);       
1041     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1042     
1043     /*MaxSizeInWord and TargetSzieInWord*/
1044     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1045                   (target_mb_size << 16) );
1046
1047     OUT_BCS_BATCH(batch, 0);
1048
1049     ADVANCE_BCS_BATCH(batch);
1050
1051     return len_in_dwords;
1052 }
1053
1054 static int
1055 gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1056                               unsigned int *msg, unsigned int offset,
1057                               struct intel_encoder_context *encoder_context,
1058                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1059                               struct intel_batchbuffer *batch)
1060 {
1061     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1062     int len_in_dwords = 12;
1063     unsigned int inter_msg = 0;
1064     if (batch == NULL)
1065         batch = encoder_context->base.batch;
1066     {
1067 #define MSG_MV_OFFSET   4
1068         unsigned int *mv_ptr;
1069         mv_ptr = msg + MSG_MV_OFFSET;
1070         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1071          * to convert them to be compatible with the format of AVC_PAK
1072          * command.
1073          */
1074         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1075             /* MV[0] and MV[2] are replicated */
1076             mv_ptr[4] = mv_ptr[0];
1077             mv_ptr[5] = mv_ptr[1];
1078             mv_ptr[2] = mv_ptr[8];
1079             mv_ptr[3] = mv_ptr[9];
1080             mv_ptr[6] = mv_ptr[8];
1081             mv_ptr[7] = mv_ptr[9];
1082         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1083             /* MV[0] and MV[1] are replicated */
1084             mv_ptr[2] = mv_ptr[0];
1085             mv_ptr[3] = mv_ptr[1];
1086             mv_ptr[4] = mv_ptr[16];
1087             mv_ptr[5] = mv_ptr[17];
1088             mv_ptr[6] = mv_ptr[24];
1089             mv_ptr[7] = mv_ptr[25];
1090         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1091                    !(msg[1] & SUBMB_SHAPE_MASK)) {
1092             /* Don't touch MV[0] or MV[1] */
1093             mv_ptr[2] = mv_ptr[8];
1094             mv_ptr[3] = mv_ptr[9];
1095             mv_ptr[4] = mv_ptr[16];
1096             mv_ptr[5] = mv_ptr[17];
1097             mv_ptr[6] = mv_ptr[24];
1098             mv_ptr[7] = mv_ptr[25];
1099         }
1100     }
1101
1102     BEGIN_BCS_BATCH(batch, len_in_dwords);
1103
1104     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1105
1106     inter_msg = 32;
1107     /* MV quantity */
1108     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1109         if (msg[1] & SUBMB_SHAPE_MASK)
1110             inter_msg = 128;
1111     }
1112     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1113     OUT_BCS_BATCH(batch, offset);
1114     inter_msg = msg[0] & (0x1F00FFFF);
1115     inter_msg |= INTER_MV8;
1116     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1117     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1118         (msg[1] & SUBMB_SHAPE_MASK)) {
1119         inter_msg |= INTER_MV32;
1120     }
1121
1122     OUT_BCS_BATCH(batch, inter_msg);
1123
1124     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1125     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1126 #if 0 
1127     if ( slice_type == SLICE_TYPE_B) {
1128         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1129     } else {
1130         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1131     }
1132 #else
1133     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1134 #endif
1135
1136     inter_msg = msg[1] >> 8;
1137     /*Stuff for Inter MB*/
1138     OUT_BCS_BATCH(batch, inter_msg);        
1139     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1140     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1141
1142     /*MaxSizeInWord and TargetSzieInWord*/
1143     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1144                   (target_mb_size << 16) );
1145
1146     OUT_BCS_BATCH(batch, 0x0);    
1147
1148     ADVANCE_BCS_BATCH(batch);
1149
1150     return len_in_dwords;
1151 }
1152
1153 static void 
1154 gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1155                                        struct encode_state *encode_state,
1156                                        struct intel_encoder_context *encoder_context,
1157                                        int slice_index,
1158                                        struct intel_batchbuffer *slice_batch)
1159 {
1160     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1161     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1162     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1163     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1164     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1165     unsigned int *msg = NULL, offset = 0;
1166     unsigned char *msg_ptr = NULL;
1167     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1168     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1169     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1170     int i,x,y;
1171     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1172     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1173     unsigned int tail_data[] = { 0x0, 0x0 };
1174     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1175     int is_intra = slice_type == SLICE_TYPE_I;
1176     int qp_slice;
1177     int qp_mb;
1178
1179     qp_slice = qp;
1180     if (rate_control_mode == VA_RC_CBR) {
1181         qp = mfc_context->brc.qp_prime_y[0][slice_type];
1182         if (encode_state->slice_header_index[slice_index] == 0) {
1183             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1184             qp_slice = qp;
1185         }
1186     }
1187
1188     /* only support for 8-bit pixel bit-depth */
1189     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1190     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1191     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1192     assert(qp >= 0 && qp < 52);
1193
1194     gen8_mfc_avc_slice_state(ctx,
1195                              pPicParameter,
1196                              pSliceParameter,
1197                              encode_state, encoder_context,
1198                              (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
1199
1200     if ( slice_index == 0)
1201         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1202
1203     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1204
1205     dri_bo_map(vme_context->vme_output.bo , 1);
1206     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1207
1208     if (is_intra) {
1209         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1210     } else {
1211         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1212     }
1213    
1214     for (i = pSliceParameter->macroblock_address; 
1215          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1216         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1217         x = i % width_in_mbs;
1218         y = i / width_in_mbs;
1219         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1220         if (vme_context->roi_enabled) {
1221             qp_mb = *(vme_context->qp_per_mb + i);
1222         } else
1223             qp_mb = qp;
1224
1225         if (is_intra) {
1226             assert(msg);
1227             gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1228         } else {
1229             int inter_rdo, intra_rdo;
1230             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1231             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1232             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1233             if (intra_rdo < inter_rdo) { 
1234                 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1235             } else {
1236                 msg += AVC_INTER_MSG_OFFSET;
1237                 gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1238             }
1239         }
1240     }
1241    
1242     dri_bo_unmap(vme_context->vme_output.bo);
1243
1244     if ( last_slice ) {    
1245         mfc_context->insert_object(ctx, encoder_context,
1246                                    tail_data, 2, 8,
1247                                    2, 1, 1, 0, slice_batch);
1248     } else {
1249         mfc_context->insert_object(ctx, encoder_context,
1250                                    tail_data, 1, 8,
1251                                    1, 1, 1, 0, slice_batch);
1252     }
1253 }
1254
1255 static dri_bo *
1256 gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1257                                   struct encode_state *encode_state,
1258                                   struct intel_encoder_context *encoder_context)
1259 {
1260     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1261     struct intel_batchbuffer *batch;
1262     dri_bo *batch_bo;
1263     int i;
1264
1265     batch = mfc_context->aux_batchbuffer;
1266     batch_bo = batch->buffer;
1267     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1268         gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1269     }
1270
1271     intel_batchbuffer_align(batch, 8);
1272     
1273     BEGIN_BCS_BATCH(batch, 2);
1274     OUT_BCS_BATCH(batch, 0);
1275     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1276     ADVANCE_BCS_BATCH(batch);
1277
1278     dri_bo_reference(batch_bo);
1279     intel_batchbuffer_free(batch);
1280     mfc_context->aux_batchbuffer = NULL;
1281
1282     return batch_bo;
1283 }
1284
1285
1286 static void
1287 gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1288                                     struct encode_state *encode_state,
1289                                     struct intel_encoder_context *encoder_context)
1290 {
1291     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1292     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1293
1294     assert(vme_context->vme_output.bo);
1295     mfc_context->buffer_suface_setup(ctx,
1296                                      &mfc_context->gpe_context,
1297                                      &vme_context->vme_output,
1298                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1299                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1300 }
1301
1302 static void
1303 gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1304                                      struct encode_state *encode_state,
1305                                      struct intel_encoder_context *encoder_context)
1306 {
1307     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1308     assert(mfc_context->aux_batchbuffer_surface.bo);
1309     mfc_context->buffer_suface_setup(ctx,
1310                                      &mfc_context->gpe_context,
1311                                      &mfc_context->aux_batchbuffer_surface,
1312                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1313                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1314 }
1315
1316 static void
1317 gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1318                                     struct encode_state *encode_state,
1319                                     struct intel_encoder_context *encoder_context)
1320 {
1321     gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1322     gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1323 }
1324
1325 static void
1326 gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1327                                 struct encode_state *encode_state,
1328                                 struct intel_encoder_context *encoder_context)
1329 {
1330     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1331     struct gen8_interface_descriptor_data *desc;
1332     int i;
1333     dri_bo *bo;
1334     unsigned char *desc_ptr;
1335
1336     bo = mfc_context->gpe_context.dynamic_state.bo;
1337     dri_bo_map(bo, 1);
1338     assert(bo->virtual);
1339     desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt_offset;
1340
1341     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
1342
1343     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1344         struct i965_kernel *kernel;
1345         kernel = &mfc_context->gpe_context.kernels[i];
1346         assert(sizeof(*desc) == 32);
1347         /*Setup the descritor table*/
1348         memset(desc, 0, sizeof(*desc));
1349         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
1350         desc->desc3.sampler_count = 0;
1351         desc->desc3.sampler_state_pointer = 0;
1352         desc->desc4.binding_table_entry_count = 1;
1353         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1354         desc->desc5.constant_urb_entry_read_offset = 0;
1355         desc->desc5.constant_urb_entry_read_length = 4;
1356
1357                 
1358         desc++;
1359     }
1360
1361     dri_bo_unmap(bo);
1362
1363     return;
1364 }
1365
1366 static void
1367 gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1368                                     struct encode_state *encode_state,
1369                                     struct intel_encoder_context *encoder_context)
1370 {
1371     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1372     
1373     (void)mfc_context;
1374 }
1375
1376 #define AVC_PAK_LEN_IN_BYTE     48
1377 #define AVC_PAK_LEN_IN_OWORD    3
1378
1379 static void
1380 gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1381                                           uint32_t intra_flag,
1382                                           int head_offset,
1383                                           int number_mb_cmds,
1384                                           int slice_end_x,
1385                                           int slice_end_y,
1386                                           int mb_x,
1387                                           int mb_y,
1388                                           int width_in_mbs,
1389                                           int qp,
1390                                           uint32_t fwd_ref,
1391                                           uint32_t bwd_ref)
1392 {
1393     uint32_t temp_value;
1394     BEGIN_BATCH(batch, 14);
1395     
1396     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
1397     OUT_BATCH(batch, 0);
1398     OUT_BATCH(batch, 0);
1399     OUT_BATCH(batch, 0);
1400     OUT_BATCH(batch, 0);
1401     OUT_BATCH(batch, 0);
1402    
1403     /*inline data */
1404     OUT_BATCH(batch, head_offset / 16);
1405     OUT_BATCH(batch, (intra_flag) | (qp << 16));
1406     temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
1407     OUT_BATCH(batch, temp_value);
1408
1409     OUT_BATCH(batch, number_mb_cmds);
1410
1411     OUT_BATCH(batch,
1412               ((slice_end_y << 8) | (slice_end_x)));
1413     OUT_BATCH(batch, fwd_ref);
1414     OUT_BATCH(batch, bwd_ref);
1415
1416     OUT_BATCH(batch, MI_NOOP);
1417
1418     ADVANCE_BATCH(batch);
1419 }
1420
1421 static void
1422 gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1423                                         struct intel_encoder_context *encoder_context,
1424                                         VAEncSliceParameterBufferH264 *slice_param,
1425                                         int head_offset,
1426                                         int qp,
1427                                         int last_slice)
1428 {
1429     struct intel_batchbuffer *batch = encoder_context->base.batch;
1430     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1431     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1432     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1433     int total_mbs = slice_param->num_macroblocks;
1434     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1435     int number_mb_cmds = 128;
1436     int starting_offset = 0;
1437     int mb_x, mb_y;
1438     int last_mb, slice_end_x, slice_end_y;
1439     int remaining_mb = total_mbs;
1440     uint32_t fwd_ref , bwd_ref, mb_flag;
1441     char tmp_qp;
1442     int number_roi_mbs, max_mb_cmds, i;
1443
1444     last_mb = slice_param->macroblock_address + total_mbs - 1;
1445     slice_end_x = last_mb % width_in_mbs;
1446     slice_end_y = last_mb / width_in_mbs;
1447
1448     if (slice_type == SLICE_TYPE_I) {
1449         fwd_ref = 0;
1450         bwd_ref = 0;
1451         mb_flag = 1;
1452     } else {
1453         fwd_ref = vme_context->ref_index_in_mb[0];
1454         bwd_ref = vme_context->ref_index_in_mb[1];
1455         mb_flag = 0;
1456     }
1457
1458     if (width_in_mbs >= 100) {
1459         number_mb_cmds = width_in_mbs / 5;
1460     } else if (width_in_mbs >= 80) {
1461         number_mb_cmds = width_in_mbs / 4;
1462     } else if (width_in_mbs >= 60) {
1463         number_mb_cmds = width_in_mbs / 3;
1464     } else if (width_in_mbs >= 40) {
1465         number_mb_cmds = width_in_mbs / 2;
1466     } else {
1467         number_mb_cmds = width_in_mbs;
1468     }
1469
1470     max_mb_cmds = number_mb_cmds;
1471
1472     do {
1473         mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
1474         mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
1475
1476         number_mb_cmds = max_mb_cmds;
1477         if (vme_context->roi_enabled) {
1478
1479             number_roi_mbs = 1;
1480             tmp_qp = *(vme_context->qp_per_mb + starting_offset);
1481             for (i = 1; i < max_mb_cmds; i++) {
1482                 if (tmp_qp != *(vme_context->qp_per_mb + starting_offset + i))
1483                     break;
1484
1485                 number_roi_mbs++;
1486             }
1487
1488             number_mb_cmds = number_roi_mbs;
1489             qp = tmp_qp;
1490         }
1491
1492         if (number_mb_cmds >= remaining_mb) {
1493             number_mb_cmds = remaining_mb;
1494         }
1495
1496         gen8_mfc_batchbuffer_emit_object_command(batch,
1497                                                   mb_flag,
1498                                                   head_offset,
1499                                                   number_mb_cmds,
1500                                                   slice_end_x,
1501                                                   slice_end_y,
1502                                                   mb_x,
1503                                                   mb_y,
1504                                                   width_in_mbs,
1505                                                   qp,
1506                                                   fwd_ref,
1507                                                   bwd_ref);
1508
1509         head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
1510         remaining_mb -= number_mb_cmds;
1511         starting_offset += number_mb_cmds;
1512     } while (remaining_mb > 0);
1513 }
1514
1515 static void
1516 gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1517                                 struct encode_state *encode_state,
1518                                 struct intel_encoder_context *encoder_context,
1519                                 int slice_index)
1520 {
1521     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1522     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1523     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1524     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1525     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1526     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1527     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1528     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1529     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1530     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1531     unsigned int tail_data[] = { 0x0, 0x0 };
1532     long head_offset;
1533     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1534     int qp_slice;
1535
1536     qp_slice = qp;
1537     if (rate_control_mode == VA_RC_CBR) {
1538         qp = mfc_context->brc.qp_prime_y[0][slice_type];
1539         if (encode_state->slice_header_index[slice_index] == 0) {
1540             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1541             qp_slice = qp;
1542         }
1543     }
1544
1545     /* only support for 8-bit pixel bit-depth */
1546     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1547     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1548     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1549     assert(qp >= 0 && qp < 52);
1550
1551     gen8_mfc_avc_slice_state(ctx,
1552                               pPicParameter,
1553                               pSliceParameter,
1554                               encode_state,
1555                               encoder_context,
1556                               (rate_control_mode == VA_RC_CBR),
1557                               qp_slice,
1558                               slice_batch);
1559
1560     if (slice_index == 0)
1561         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1562
1563     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1564
1565     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1566     head_offset = intel_batchbuffer_used_size(slice_batch);
1567
1568     slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
1569
1570     gen8_mfc_avc_batchbuffer_slice_command(ctx,
1571                                             encoder_context,
1572                                             pSliceParameter,
1573                                             head_offset,
1574                                             qp,
1575                                             last_slice);
1576
1577
1578     /* Aligned for tail */
1579     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1580     if (last_slice) {    
1581         mfc_context->insert_object(ctx,
1582                                    encoder_context,
1583                                    tail_data,
1584                                    2,
1585                                    8,
1586                                    2,
1587                                    1,
1588                                    1,
1589                                    0,
1590                                    slice_batch);
1591     } else {
1592         mfc_context->insert_object(ctx,
1593                                    encoder_context,
1594                                    tail_data,
1595                                    1,
1596                                    8,
1597                                    1,
1598                                    1,
1599                                    1,
1600                                    0,
1601                                    slice_batch);
1602     }
1603
1604     return;
1605 }
1606
1607 static void
1608 gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1609                                   struct encode_state *encode_state,
1610                                   struct intel_encoder_context *encoder_context)
1611 {
1612     struct i965_driver_data *i965 = i965_driver_data(ctx);
1613     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1614     struct intel_batchbuffer *batch = encoder_context->base.batch;
1615     int i;
1616
1617     intel_batchbuffer_start_atomic(batch, 0x4000);
1618
1619     if (IS_GEN9(i965->intel.device_info))
1620         gen9_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1621     else
1622         gen8_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1623
1624     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1625         gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
1626     }
1627     {
1628         struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1629
1630         intel_batchbuffer_align(slice_batch, 8);
1631         BEGIN_BCS_BATCH(slice_batch, 2);
1632         OUT_BCS_BATCH(slice_batch, 0);
1633         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
1634         ADVANCE_BCS_BATCH(slice_batch);
1635
1636         BEGIN_BATCH(batch, 2);
1637         OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1638         OUT_BATCH(batch, 0);
1639         ADVANCE_BATCH(batch);
1640     }
1641
1642     intel_batchbuffer_end_atomic(batch);
1643     intel_batchbuffer_flush(batch);
1644
1645     if (IS_GEN9(i965->intel.device_info))
1646         gen9_gpe_pipeline_end(ctx, &mfc_context->gpe_context, batch);
1647 }
1648
1649 static void
1650 gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1651                                struct encode_state *encode_state,
1652                                struct intel_encoder_context *encoder_context)
1653 {
1654     gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1655     gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1656     gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1657     gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1658 }
1659
1660 static dri_bo *
1661 gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1662                                   struct encode_state *encode_state,
1663                                   struct intel_encoder_context *encoder_context)
1664 {
1665     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1666
1667     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1668     gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1669
1670     return mfc_context->aux_batchbuffer_surface.bo;
1671 }
1672
1673 static void
1674 gen8_mfc_avc_pipeline_programing(VADriverContextP ctx,
1675                                  struct encode_state *encode_state,
1676                                  struct intel_encoder_context *encoder_context)
1677 {
1678     struct intel_batchbuffer *batch = encoder_context->base.batch;
1679     dri_bo *slice_batch_bo;
1680
1681     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1682         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1683         assert(0);
1684         return; 
1685     }
1686
1687     if (encoder_context->soft_batch_force)
1688         slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1689     else
1690         slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1691
1692
1693     // begin programing
1694     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1695     intel_batchbuffer_emit_mi_flush(batch);
1696     
1697     // picture level programing
1698     gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1699
1700     BEGIN_BCS_BATCH(batch, 3);
1701     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1702     OUT_BCS_RELOC(batch,
1703                   slice_batch_bo,
1704                   I915_GEM_DOMAIN_COMMAND, 0, 
1705                   0);
1706     OUT_BCS_BATCH(batch, 0);
1707     ADVANCE_BCS_BATCH(batch);
1708
1709     // end programing
1710     intel_batchbuffer_end_atomic(batch);
1711
1712     dri_bo_unreference(slice_batch_bo);
1713 }
1714
1715
1716 static VAStatus
1717 gen8_mfc_avc_encode_picture(VADriverContextP ctx, 
1718                             struct encode_state *encode_state,
1719                             struct intel_encoder_context *encoder_context)
1720 {
1721     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1722     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1723     int current_frame_bits_size;
1724     int sts;
1725  
1726     for (;;) {
1727         gen8_mfc_init(ctx, encode_state, encoder_context);
1728         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1729         /*Programing bcs pipeline*/
1730         gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
1731         gen8_mfc_run(ctx, encode_state, encoder_context);
1732         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1733             gen8_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1734             sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
1735             if (sts == BRC_NO_HRD_VIOLATION) {
1736                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1737                 break;
1738             }
1739             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1740                 if (!mfc_context->hrd.violation_noted) {
1741                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1742                     mfc_context->hrd.violation_noted = 1;
1743                 }
1744                 return VA_STATUS_SUCCESS;
1745             }
1746         } else {
1747             break;
1748         }
1749     }
1750
1751     return VA_STATUS_SUCCESS;
1752 }
1753
1754 /*
1755  * MPEG-2
1756  */
1757
1758 static const int
1759 va_to_gen8_mpeg2_picture_type[3] = {
1760     1,  /* I */
1761     2,  /* P */
1762     3   /* B */
1763 };
1764
1765 static void
1766 gen8_mfc_mpeg2_pic_state(VADriverContextP ctx,
1767                          struct intel_encoder_context *encoder_context,
1768                          struct encode_state *encode_state)
1769 {
1770     struct intel_batchbuffer *batch = encoder_context->base.batch;
1771     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1772     VAEncPictureParameterBufferMPEG2 *pic_param;
1773     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1774     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1775     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1776
1777     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1778     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1779     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1780
1781     BEGIN_BCS_BATCH(batch, 13);
1782     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1783     OUT_BCS_BATCH(batch,
1784                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1785                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1786                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1787                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1788                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1789                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1790                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1791                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1792                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1793                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1794                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1795                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1796     OUT_BCS_BATCH(batch,
1797                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1798                   va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 |
1799                   0);
1800     OUT_BCS_BATCH(batch,
1801                   1 << 31 |     /* slice concealment */
1802                   (height_in_mbs - 1) << 16 |
1803                   (width_in_mbs - 1));
1804
1805     if (slice_param && slice_param->quantiser_scale_code >= 14)
1806         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1807     else
1808         OUT_BCS_BATCH(batch, 0);
1809
1810     OUT_BCS_BATCH(batch, 0);
1811     OUT_BCS_BATCH(batch,
1812                   0xFFF << 16 | /* InterMBMaxSize */
1813                   0xFFF << 0 |  /* IntraMBMaxSize */
1814                   0);
1815     OUT_BCS_BATCH(batch, 0);
1816     OUT_BCS_BATCH(batch, 0);
1817     OUT_BCS_BATCH(batch, 0);
1818     OUT_BCS_BATCH(batch, 0);
1819     OUT_BCS_BATCH(batch, 0);
1820     OUT_BCS_BATCH(batch, 0);
1821     ADVANCE_BCS_BATCH(batch);
1822 }
1823
1824 static void
1825 gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1826 {
1827     unsigned char intra_qm[64] = {
1828         8, 16, 19, 22, 26, 27, 29, 34,
1829         16, 16, 22, 24, 27, 29, 34, 37,
1830         19, 22, 26, 27, 29, 34, 34, 38,
1831         22, 22, 26, 27, 29, 34, 37, 40,
1832         22, 26, 27, 29, 32, 35, 40, 48,
1833         26, 27, 29, 32, 35, 40, 48, 58,
1834         26, 27, 29, 34, 38, 46, 56, 69,
1835         27, 29, 35, 38, 46, 56, 69, 83
1836     };
1837
1838     unsigned char non_intra_qm[64] = {
1839         16, 16, 16, 16, 16, 16, 16, 16,
1840         16, 16, 16, 16, 16, 16, 16, 16,
1841         16, 16, 16, 16, 16, 16, 16, 16,
1842         16, 16, 16, 16, 16, 16, 16, 16,
1843         16, 16, 16, 16, 16, 16, 16, 16,
1844         16, 16, 16, 16, 16, 16, 16, 16,
1845         16, 16, 16, 16, 16, 16, 16, 16,
1846         16, 16, 16, 16, 16, 16, 16, 16
1847     };
1848
1849     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1850     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1851 }
1852
1853 static void
1854 gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1855 {
1856     unsigned short intra_fqm[64] = {
1857         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1858         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1859         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1860         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1861         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1862         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1863         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1864         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1865     };
1866
1867     unsigned short non_intra_fqm[64] = {
1868         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1869         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1870         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1871         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1872         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1873         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1874         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1875         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1876     };
1877
1878     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1879     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1880 }
1881
1882 static void
1883 gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1884                                 struct intel_encoder_context *encoder_context,
1885                                 int x, int y,
1886                                 int next_x, int next_y,
1887                                 int is_fisrt_slice_group,
1888                                 int is_last_slice_group,
1889                                 int intra_slice,
1890                                 int qp,
1891                                 struct intel_batchbuffer *batch)
1892 {
1893     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1894
1895     if (batch == NULL)
1896         batch = encoder_context->base.batch;
1897
1898     BEGIN_BCS_BATCH(batch, 8);
1899
1900     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1901     OUT_BCS_BATCH(batch,
1902                   0 << 31 |                             /* MbRateCtrlFlag */
1903                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1904                   1 << 17 |                             /* Insert Header before the first slice group data */
1905                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1906                   1 << 15 |                             /* TailPresentFlag: always 1 */
1907                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1908                   !!intra_slice << 13 |                 /* IntraSlice */
1909                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1910                   0);
1911     OUT_BCS_BATCH(batch,
1912                   next_y << 24 |
1913                   next_x << 16 |
1914                   y << 8 |
1915                   x << 0 |
1916                   0);
1917     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1918     /* bitstream pointer is only loaded once for the first slice of a frame when 
1919      * LoadSlicePointerFlag is 0
1920      */
1921     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1922     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1923     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1924     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1925
1926     ADVANCE_BCS_BATCH(batch);
1927 }
1928
1929 static int
1930 gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1931                                 struct intel_encoder_context *encoder_context,
1932                                 int x, int y,
1933                                 int first_mb_in_slice,
1934                                 int last_mb_in_slice,
1935                                 int first_mb_in_slice_group,
1936                                 int last_mb_in_slice_group,
1937                                 int mb_type,
1938                                 int qp_scale_code,
1939                                 int coded_block_pattern,
1940                                 unsigned char target_size_in_word,
1941                                 unsigned char max_size_in_word,
1942                                 struct intel_batchbuffer *batch)
1943 {
1944     int len_in_dwords = 9;
1945
1946     if (batch == NULL)
1947         batch = encoder_context->base.batch;
1948
1949     BEGIN_BCS_BATCH(batch, len_in_dwords);
1950
1951     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1952     OUT_BCS_BATCH(batch,
1953                   0 << 24 |     /* PackedMvNum */
1954                   0 << 20 |     /* MvFormat */
1955                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1956                   0 << 15 |     /* TransformFlag: frame DCT */
1957                   0 << 14 |     /* FieldMbFlag */
1958                   1 << 13 |     /* IntraMbFlag */
1959                   mb_type << 8 |   /* MbType: Intra */
1960                   0 << 2 |      /* SkipMbFlag */
1961                   0 << 0 |      /* InterMbMode */
1962                   0);
1963     OUT_BCS_BATCH(batch, y << 16 | x);
1964     OUT_BCS_BATCH(batch,
1965                   max_size_in_word << 24 |
1966                   target_size_in_word << 16 |
1967                   coded_block_pattern << 6 |      /* CBP */
1968                   0);
1969     OUT_BCS_BATCH(batch,
1970                   last_mb_in_slice << 31 |
1971                   first_mb_in_slice << 30 |
1972                   0 << 27 |     /* EnableCoeffClamp */
1973                   last_mb_in_slice_group << 26 |
1974                   0 << 25 |     /* MbSkipConvDisable */
1975                   first_mb_in_slice_group << 24 |
1976                   0 << 16 |     /* MvFieldSelect */
1977                   qp_scale_code << 0 |
1978                   0);
1979     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1980     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1981     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1982     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1983
1984     ADVANCE_BCS_BATCH(batch);
1985
1986     return len_in_dwords;
1987 }
1988
1989 /* Byte offset */
1990 #define MPEG2_INTER_MV_OFFSET   48 
1991
1992 static struct _mv_ranges
1993 {
1994     int low;    /* in the unit of 1/2 pixel */
1995     int high;   /* in the unit of 1/2 pixel */
1996 } mv_ranges[] = {
1997     {0, 0},
1998     {-16, 15},
1999     {-32, 31},
2000     {-64, 63},
2001     {-128, 127},
2002     {-256, 255},
2003     {-512, 511},
2004     {-1024, 1023},
2005     {-2048, 2047},
2006     {-4096, 4095}
2007 };
2008
2009 static int
2010 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2011 {
2012     if (mv + pos * 16 * 2 < 0 ||
2013         mv + (pos + 1) * 16 * 2 > display_max * 2)
2014         mv = 0;
2015
2016     if (f_code > 0 && f_code < 10) {
2017         if (mv < mv_ranges[f_code].low)
2018             mv = mv_ranges[f_code].low;
2019
2020         if (mv > mv_ranges[f_code].high)
2021             mv = mv_ranges[f_code].high;
2022     }
2023
2024     return mv;
2025 }
2026
2027 static int
2028 gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2029                                 struct encode_state *encode_state,
2030                                 struct intel_encoder_context *encoder_context,
2031                                 unsigned int *msg,
2032                                 int width_in_mbs, int height_in_mbs,
2033                                 int x, int y,
2034                                 int first_mb_in_slice,
2035                                 int last_mb_in_slice,
2036                                 int first_mb_in_slice_group,
2037                                 int last_mb_in_slice_group,
2038                                 int qp_scale_code,
2039                                 unsigned char target_size_in_word,
2040                                 unsigned char max_size_in_word,
2041                                 struct intel_batchbuffer *batch)
2042 {
2043     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2044     int len_in_dwords = 9;
2045     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2046     
2047     if (batch == NULL)
2048         batch = encoder_context->base.batch;
2049
2050     mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
2051     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2052     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2053     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2054     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2055
2056     BEGIN_BCS_BATCH(batch, len_in_dwords);
2057
2058     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2059     OUT_BCS_BATCH(batch,
2060                   2 << 24 |     /* PackedMvNum */
2061                   7 << 20 |     /* MvFormat */
2062                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2063                   0 << 15 |     /* TransformFlag: frame DCT */
2064                   0 << 14 |     /* FieldMbFlag */
2065                   0 << 13 |     /* IntraMbFlag */
2066                   1 << 8 |      /* MbType: Frame-based */
2067                   0 << 2 |      /* SkipMbFlag */
2068                   0 << 0 |      /* InterMbMode */
2069                   0);
2070     OUT_BCS_BATCH(batch, y << 16 | x);
2071     OUT_BCS_BATCH(batch,
2072                   max_size_in_word << 24 |
2073                   target_size_in_word << 16 |
2074                   0x3f << 6 |   /* CBP */
2075                   0);
2076     OUT_BCS_BATCH(batch,
2077                   last_mb_in_slice << 31 |
2078                   first_mb_in_slice << 30 |
2079                   0 << 27 |     /* EnableCoeffClamp */
2080                   last_mb_in_slice_group << 26 |
2081                   0 << 25 |     /* MbSkipConvDisable */
2082                   first_mb_in_slice_group << 24 |
2083                   0 << 16 |     /* MvFieldSelect */
2084                   qp_scale_code << 0 |
2085                   0);
2086
2087     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2088     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2089     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2090     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2091
2092     ADVANCE_BCS_BATCH(batch);
2093
2094     return len_in_dwords;
2095 }
2096
2097 static void
2098 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2099                                            struct encode_state *encode_state,
2100                                            struct intel_encoder_context *encoder_context,
2101                                            struct intel_batchbuffer *slice_batch)
2102 {
2103     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2104     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2105
2106     if (encode_state->packed_header_data[idx]) {
2107         VAEncPackedHeaderParameterBuffer *param = NULL;
2108         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2109         unsigned int length_in_bits;
2110
2111         assert(encode_state->packed_header_param[idx]);
2112         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2113         length_in_bits = param->bit_length;
2114
2115         mfc_context->insert_object(ctx,
2116                                    encoder_context,
2117                                    header_data,
2118                                    ALIGN(length_in_bits, 32) >> 5,
2119                                    length_in_bits & 0x1f,
2120                                    5,   /* FIXME: check it */
2121                                    0,
2122                                    0,
2123                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2124                                    slice_batch);
2125     }
2126
2127     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2128
2129     if (encode_state->packed_header_data[idx]) {
2130         VAEncPackedHeaderParameterBuffer *param = NULL;
2131         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2132         unsigned int length_in_bits;
2133
2134         assert(encode_state->packed_header_param[idx]);
2135         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2136         length_in_bits = param->bit_length;
2137
2138         mfc_context->insert_object(ctx,
2139                                    encoder_context,
2140                                    header_data,
2141                                    ALIGN(length_in_bits, 32) >> 5,
2142                                    length_in_bits & 0x1f,
2143                                    5,   /* FIXME: check it */
2144                                    0,
2145                                    0,
2146                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2147                                    slice_batch);
2148     }
2149 }
2150
2151 static void 
2152 gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2153                                     struct encode_state *encode_state,
2154                                     struct intel_encoder_context *encoder_context,
2155                                     int slice_index,
2156                                     VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2157                                     struct intel_batchbuffer *slice_batch)
2158 {
2159     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2160     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2161     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2162     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2163     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2164     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2165     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2166     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2167     int i, j;
2168     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2169     unsigned int *msg = NULL;
2170     unsigned char *msg_ptr = NULL;
2171
2172     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2173     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2174     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2175     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2176
2177     dri_bo_map(vme_context->vme_output.bo , 0);
2178     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2179
2180     if (next_slice_group_param) {
2181         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2182         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2183     } else {
2184         h_next_start_pos = 0;
2185         v_next_start_pos = height_in_mbs;
2186     }
2187
2188     gen8_mfc_mpeg2_slicegroup_state(ctx,
2189                                     encoder_context,
2190                                     h_start_pos,
2191                                     v_start_pos,
2192                                     h_next_start_pos,
2193                                     v_next_start_pos,
2194                                     slice_index == 0,
2195                                     next_slice_group_param == NULL,
2196                                     slice_param->is_intra_slice,
2197                                     slice_param->quantiser_scale_code,
2198                                     slice_batch);
2199
2200     if (slice_index == 0) 
2201         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2202
2203     /* Insert '00' to make sure the header is valid */
2204     mfc_context->insert_object(ctx,
2205                                encoder_context,
2206                                (unsigned int*)section_delimiter,
2207                                1,
2208                                8,   /* 8bits in the last DWORD */
2209                                1,   /* 1 byte */
2210                                1,
2211                                0,
2212                                0,
2213                                slice_batch);
2214
2215     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2216         /* PAK for each macroblocks */
2217         for (j = 0; j < slice_param->num_macroblocks; j++) {
2218             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2219             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2220             int first_mb_in_slice = (j == 0);
2221             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2222             int first_mb_in_slice_group = (i == 0 && j == 0);
2223             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2224                                           j == slice_param->num_macroblocks - 1);
2225
2226             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2227
2228             if (slice_param->is_intra_slice) {
2229                 gen8_mfc_mpeg2_pak_object_intra(ctx,
2230                                                 encoder_context,
2231                                                 h_pos, v_pos,
2232                                                 first_mb_in_slice,
2233                                                 last_mb_in_slice,
2234                                                 first_mb_in_slice_group,
2235                                                 last_mb_in_slice_group,
2236                                                 0x1a,
2237                                                 slice_param->quantiser_scale_code,
2238                                                 0x3f,
2239                                                 0,
2240                                                 0xff,
2241                                                 slice_batch);
2242             } else {
2243                 int inter_rdo, intra_rdo;
2244                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2245                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2246
2247                 if (intra_rdo < inter_rdo) 
2248                     gen8_mfc_mpeg2_pak_object_intra(ctx,
2249                                                      encoder_context,
2250                                                      h_pos, v_pos,
2251                                                      first_mb_in_slice,
2252                                                      last_mb_in_slice,
2253                                                      first_mb_in_slice_group,
2254                                                      last_mb_in_slice_group,
2255                                                      0x1a,
2256                                                      slice_param->quantiser_scale_code,
2257                                                      0x3f,
2258                                                      0,
2259                                                      0xff,
2260                                                      slice_batch);
2261                 else
2262                     gen8_mfc_mpeg2_pak_object_inter(ctx,
2263                                                 encode_state,
2264                                                 encoder_context,
2265                                                 msg,
2266                                                 width_in_mbs, height_in_mbs,
2267                                                 h_pos, v_pos,
2268                                                 first_mb_in_slice,
2269                                                 last_mb_in_slice,
2270                                                 first_mb_in_slice_group,
2271                                                 last_mb_in_slice_group,
2272                                                 slice_param->quantiser_scale_code,
2273                                                 0,
2274                                                 0xff,
2275                                                 slice_batch);
2276             }
2277         }
2278
2279         slice_param++;
2280     }
2281
2282     dri_bo_unmap(vme_context->vme_output.bo);
2283
2284     /* tail data */
2285     if (next_slice_group_param == NULL) { /* end of a picture */
2286         mfc_context->insert_object(ctx,
2287                                    encoder_context,
2288                                    (unsigned int *)tail_delimiter,
2289                                    2,
2290                                    8,   /* 8bits in the last DWORD */
2291                                    5,   /* 5 bytes */
2292                                    1,
2293                                    1,
2294                                    0,
2295                                    slice_batch);
2296     } else {        /* end of a lsice group */
2297         mfc_context->insert_object(ctx,
2298                                    encoder_context,
2299                                    (unsigned int *)section_delimiter,
2300                                    1,
2301                                    8,   /* 8bits in the last DWORD */
2302                                    1,   /* 1 byte */
2303                                    1,
2304                                    1,
2305                                    0,
2306                                    slice_batch);
2307     }
2308 }
2309
2310 /* 
2311  * A batch buffer for all slices, including slice state, 
2312  * slice insert object and slice pak object commands
2313  *
2314  */
2315 static dri_bo *
2316 gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2317                                           struct encode_state *encode_state,
2318                                           struct intel_encoder_context *encoder_context)
2319 {
2320     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2321     struct intel_batchbuffer *batch;
2322     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2323     dri_bo *batch_bo;
2324     int i;
2325
2326     batch = mfc_context->aux_batchbuffer;
2327     batch_bo = batch->buffer;
2328
2329     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2330         if (i == encode_state->num_slice_params_ext - 1)
2331             next_slice_group_param = NULL;
2332         else
2333             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2334
2335         gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2336     }
2337
2338     intel_batchbuffer_align(batch, 8);
2339     
2340     BEGIN_BCS_BATCH(batch, 2);
2341     OUT_BCS_BATCH(batch, 0);
2342     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2343     ADVANCE_BCS_BATCH(batch);
2344
2345     dri_bo_reference(batch_bo);
2346     intel_batchbuffer_free(batch);
2347     mfc_context->aux_batchbuffer = NULL;
2348
2349     return batch_bo;
2350 }
2351
2352 static void
2353 gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2354                                            struct encode_state *encode_state,
2355                                            struct intel_encoder_context *encoder_context)
2356 {
2357     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2358
2359     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2360     mfc_context->set_surface_state(ctx, encoder_context);
2361     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2362     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
2363     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2364     gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2365     gen8_mfc_mpeg2_qm_state(ctx, encoder_context);
2366     gen8_mfc_mpeg2_fqm_state(ctx, encoder_context);
2367 }
2368
2369 static void
2370 gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2371                                    struct encode_state *encode_state,
2372                                    struct intel_encoder_context *encoder_context)
2373 {
2374     struct intel_batchbuffer *batch = encoder_context->base.batch;
2375     dri_bo *slice_batch_bo;
2376
2377     slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2378
2379     // begin programing
2380     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2381     intel_batchbuffer_emit_mi_flush(batch);
2382     
2383     // picture level programing
2384     gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2385
2386     BEGIN_BCS_BATCH(batch, 4);
2387     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2388     OUT_BCS_RELOC(batch,
2389                   slice_batch_bo,
2390                   I915_GEM_DOMAIN_COMMAND, 0, 
2391                   0);
2392     OUT_BCS_BATCH(batch, 0);
2393     OUT_BCS_BATCH(batch, 0);
2394     ADVANCE_BCS_BATCH(batch);
2395
2396     // end programing
2397     intel_batchbuffer_end_atomic(batch);
2398
2399     dri_bo_unreference(slice_batch_bo);
2400 }
2401
2402 static VAStatus
2403 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2404                         struct encode_state *encode_state,
2405                         struct intel_encoder_context *encoder_context)
2406 {
2407     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2408     struct object_surface *obj_surface; 
2409     struct object_buffer *obj_buffer;
2410     struct i965_coded_buffer_segment *coded_buffer_segment;
2411     VAStatus vaStatus = VA_STATUS_SUCCESS;
2412     dri_bo *bo;
2413     int i;
2414
2415     /* reconstructed surface */
2416     obj_surface = encode_state->reconstructed_object;
2417     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2418     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2419     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2420     mfc_context->surface_state.width = obj_surface->orig_width;
2421     mfc_context->surface_state.height = obj_surface->orig_height;
2422     mfc_context->surface_state.w_pitch = obj_surface->width;
2423     mfc_context->surface_state.h_pitch = obj_surface->height;
2424
2425     /* forward reference */
2426     obj_surface = encode_state->reference_objects[0];
2427
2428     if (obj_surface && obj_surface->bo) {
2429         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2430         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2431     } else
2432         mfc_context->reference_surfaces[0].bo = NULL;
2433
2434     /* backward reference */
2435     obj_surface = encode_state->reference_objects[1];
2436
2437     if (obj_surface && obj_surface->bo) {
2438         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2439         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2440     } else {
2441         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2442
2443         if (mfc_context->reference_surfaces[1].bo)
2444             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2445     }
2446
2447     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2448         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2449
2450         if (mfc_context->reference_surfaces[i].bo)
2451             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2452     }
2453     
2454     /* input YUV surface */
2455     obj_surface = encode_state->input_yuv_object;
2456     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2457     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2458
2459     /* coded buffer */
2460     obj_buffer = encode_state->coded_buf_object;
2461     bo = obj_buffer->buffer_store->bo;
2462     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2463     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2464     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2465     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2466
2467     /* set the internal flag to 0 to indicate the coded size is unknown */
2468     dri_bo_map(bo, 1);
2469     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2470     coded_buffer_segment->mapped = 0;
2471     coded_buffer_segment->codec = encoder_context->codec;
2472     dri_bo_unmap(bo);
2473
2474     return vaStatus;
2475 }
2476
2477 static VAStatus
2478 gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2479                               struct encode_state *encode_state,
2480                               struct intel_encoder_context *encoder_context)
2481 {
2482     gen8_mfc_init(ctx, encode_state, encoder_context);
2483     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2484     /*Programing bcs pipeline*/
2485     gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2486     gen8_mfc_run(ctx, encode_state, encoder_context);
2487
2488     return VA_STATUS_SUCCESS;
2489 }
2490
2491 /* JPEG encode methods */
2492
2493 static VAStatus
2494 intel_mfc_jpeg_prepare(VADriverContextP ctx, 
2495                         struct encode_state *encode_state,
2496                         struct intel_encoder_context *encoder_context)
2497 {
2498     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2499     struct object_surface *obj_surface; 
2500     struct object_buffer *obj_buffer;
2501     struct i965_coded_buffer_segment *coded_buffer_segment;
2502     VAStatus vaStatus = VA_STATUS_SUCCESS;
2503     dri_bo *bo;
2504    
2505     /* input YUV surface */
2506     obj_surface = encode_state->input_yuv_object;
2507     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2508     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2509
2510     /* coded buffer */
2511     obj_buffer = encode_state->coded_buf_object;
2512     bo = obj_buffer->buffer_store->bo;
2513     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2514     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2515     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2516     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2517
2518     /* set the internal flag to 0 to indicate the coded size is unknown */
2519     dri_bo_map(bo, 1);
2520     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2521     coded_buffer_segment->mapped = 0;
2522     coded_buffer_segment->codec = encoder_context->codec;
2523     dri_bo_unmap(bo);
2524
2525     return vaStatus;
2526 }
2527
2528
2529 static void 
2530 gen8_mfc_jpeg_set_surface_state(VADriverContextP ctx,
2531                         struct intel_encoder_context *encoder_context,
2532                         struct encode_state *encode_state)
2533 {
2534     struct intel_batchbuffer *batch = encoder_context->base.batch;
2535     struct object_surface *obj_surface = encode_state->input_yuv_object;
2536     unsigned int input_fourcc;
2537     unsigned int y_cb_offset;
2538     unsigned int y_cr_offset;
2539     unsigned int surface_format;
2540
2541     assert(obj_surface);
2542
2543     y_cb_offset = obj_surface->y_cb_offset;
2544     y_cr_offset = obj_surface->y_cr_offset;
2545     input_fourcc = obj_surface->fourcc;
2546
2547     surface_format = (obj_surface->fourcc == VA_FOURCC_Y800) ?
2548         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
2549         
2550         
2551      switch (input_fourcc) {
2552         case VA_FOURCC_Y800: {
2553             surface_format = MFX_SURFACE_MONOCHROME;
2554             break;
2555         }
2556         case VA_FOURCC_NV12: { 
2557             surface_format = MFX_SURFACE_PLANAR_420_8;
2558             break;
2559         }      
2560         case VA_FOURCC_UYVY: { 
2561             surface_format = MFX_SURFACE_YCRCB_SWAPY;
2562             break;
2563         }
2564         case VA_FOURCC_YUY2: { 
2565             surface_format = MFX_SURFACE_YCRCB_NORMAL;
2566             break;
2567         }
2568         case VA_FOURCC_RGBA:
2569         case VA_FOURCC_444P: {
2570             surface_format = MFX_SURFACE_R8G8B8A8_UNORM;
2571             break;
2572         }
2573     }
2574
2575     BEGIN_BCS_BATCH(batch, 6);
2576
2577     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2578     OUT_BCS_BATCH(batch, 0);
2579     OUT_BCS_BATCH(batch,
2580                   ((obj_surface->orig_height - 1) << 18) |
2581                   ((obj_surface->orig_width - 1) << 4));
2582     OUT_BCS_BATCH(batch,
2583                   (surface_format << 28) | /* Surface Format */
2584                   (0 << 27) | /* must be 1 for interleave U/V, hardware requirement for AVC/VC1/MPEG and 0 for JPEG */
2585                   (0 << 22) | /* surface object control state, FIXME??? */
2586                   ((obj_surface->width - 1) << 3) | /* pitch */
2587                   (0 << 2)  | /* must be 0 for interleave U/V */
2588                   (1 << 1)  | /* must be tiled */
2589                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
2590     OUT_BCS_BATCH(batch,
2591                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2592                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
2593     OUT_BCS_BATCH(batch,
2594                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2595                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoeo for JPEG */
2596                  
2597
2598     ADVANCE_BCS_BATCH(batch);
2599 }
2600
2601 static void
2602 gen8_mfc_jpeg_pic_state(VADriverContextP ctx,
2603                         struct intel_encoder_context *encoder_context,
2604                         struct encode_state *encode_state)
2605 {
2606     struct intel_batchbuffer *batch = encoder_context->base.batch;
2607     struct object_surface *obj_surface = encode_state->input_yuv_object;
2608     VAEncPictureParameterBufferJPEG *pic_param;
2609     unsigned int  surface_format;
2610     unsigned int  frame_width_in_blks;
2611     unsigned int  frame_height_in_blks;
2612     unsigned int  pixels_in_horizontal_lastMCU;
2613     unsigned int  pixels_in_vertical_lastMCU;
2614     unsigned int  input_surface_format;
2615     unsigned int  output_mcu_format;
2616     unsigned int  picture_width;
2617     unsigned int  picture_height;  
2618
2619     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2620     assert(obj_surface);
2621     pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2622     surface_format = obj_surface->fourcc;
2623     picture_width = pic_param->picture_width;
2624     picture_height = pic_param->picture_height;
2625     
2626     switch (surface_format) {
2627         case VA_FOURCC_Y800: {
2628             input_surface_format = JPEG_ENC_SURFACE_Y8; 
2629             output_mcu_format = JPEG_ENC_MCU_YUV400;
2630             break;
2631         }
2632         case VA_FOURCC_NV12: { 
2633             input_surface_format = JPEG_ENC_SURFACE_NV12; 
2634             output_mcu_format = JPEG_ENC_MCU_YUV420; 
2635             break;
2636         }      
2637         case VA_FOURCC_UYVY: { 
2638             input_surface_format = JPEG_ENC_SURFACE_UYVY; 
2639             output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y; 
2640             break;
2641         }
2642         case VA_FOURCC_YUY2: { 
2643             input_surface_format = JPEG_ENC_SURFACE_YUY2; 
2644             output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y; 
2645             break;
2646         }
2647
2648         case VA_FOURCC_RGBA:
2649         case VA_FOURCC_444P: { 
2650             input_surface_format = JPEG_ENC_SURFACE_RGB; 
2651             output_mcu_format = JPEG_ENC_MCU_RGB; 
2652             break;
2653         }
2654         default : {
2655             input_surface_format = JPEG_ENC_SURFACE_NV12; 
2656             output_mcu_format = JPEG_ENC_MCU_YUV420;
2657             break;
2658         }
2659     }
2660
2661     
2662     switch (output_mcu_format) {
2663         
2664         case JPEG_ENC_MCU_YUV400:
2665         case JPEG_ENC_MCU_RGB: {
2666             pixels_in_horizontal_lastMCU = (picture_width % 8);
2667             pixels_in_vertical_lastMCU = (picture_height % 8); 
2668
2669             //H1=1,V1=1 for YUV400 and YUV444. So, compute these values accordingly
2670             frame_width_in_blks = ((picture_width + 7) / 8); 
2671             frame_height_in_blks = ((picture_height + 7) / 8);
2672             break;
2673         }
2674         
2675         case JPEG_ENC_MCU_YUV420: {        
2676             if((picture_width % 2) == 0) 
2677                 pixels_in_horizontal_lastMCU = picture_width % 16; 
2678             else 
2679                 pixels_in_horizontal_lastMCU   = ((picture_width % 16) + 1) % 16; 
2680             
2681             if((picture_height % 2) == 0) 
2682                 pixels_in_vertical_lastMCU     = picture_height % 16; 
2683             else 
2684                 pixels_in_vertical_lastMCU   = ((picture_height % 16) + 1) % 16; 
2685
2686             //H1=2,V1=2 for YUV420. So, compute these values accordingly
2687             frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2688             frame_height_in_blks = ((picture_height + 15) / 16) * 2;
2689             break;
2690         }
2691         
2692         case JPEG_ENC_MCU_YUV422H_2Y: {
2693             if(picture_width % 2 == 0) 
2694                 pixels_in_horizontal_lastMCU = picture_width % 16; 
2695             else 
2696                 pixels_in_horizontal_lastMCU = ((picture_width % 16) + 1) % 16; 
2697             
2698             pixels_in_vertical_lastMCU = picture_height % 8;
2699             
2700             //H1=2,V1=1 for YUV422H_2Y. So, compute these values accordingly
2701             frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2702             frame_height_in_blks = ((picture_height + 7) / 8);
2703             break;            
2704         }       
2705     } //end of switch
2706    
2707     BEGIN_BCS_BATCH(batch, 3);
2708     /* DWORD 0 */
2709     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2)); 
2710     /* DWORD 1 */
2711     OUT_BCS_BATCH(batch,
2712                   ( pixels_in_horizontal_lastMCU << 26) |    /* Pixels In Horizontal Last MCU */
2713                   ( pixels_in_vertical_lastMCU << 21)   |    /* Pixels In Vertical Last MCU */
2714                   ( input_surface_format << 8)          |    /* Input Surface format */
2715                   ( output_mcu_format << 0));                /* Output MCU Structure */
2716     /* DWORD 2 */
2717     OUT_BCS_BATCH(batch,
2718                   ((frame_height_in_blks - 1) << 16)    |   /* Frame Height In Blks Minus 1 */
2719                   (JPEG_ENC_ROUND_QUANT_DEFAULT  << 13) |   /* Rounding Quant set to default value 0 */
2720                   ((frame_width_in_blks - 1) << 0));        /* Frame Width In Blks Minus 1 */
2721     ADVANCE_BCS_BATCH(batch);
2722 }
2723
2724 static void 
2725 get_reciprocal_dword_qm(unsigned char *raster_qm, uint32_t *dword_qm)
2726 {
2727     int i = 0, j = 0;
2728     short reciprocal_qm[64];
2729     
2730     for(i=0; i<64; i++) {
2731         reciprocal_qm[i] = 65535/(raster_qm[i]);           
2732     }
2733     
2734     for(i=0; i<64; i++) {
2735         dword_qm[j] = ((reciprocal_qm[i+1] <<16) | (reciprocal_qm[i]));
2736         j++;
2737         i++;
2738     }    
2739     
2740 }
2741
2742
2743 static void 
2744 gen8_mfc_jpeg_fqm_state(VADriverContextP ctx,
2745                         struct intel_encoder_context *encoder_context,
2746                         struct encode_state *encode_state)
2747 {
2748     unsigned int quality = 0;
2749     uint32_t temp, i = 0, j = 0, dword_qm[32];
2750     VAEncPictureParameterBufferJPEG *pic_param;
2751     VAQMatrixBufferJPEG *qmatrix;
2752     unsigned char raster_qm[64], column_raster_qm[64];
2753     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2754     
2755     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2756     pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2757     quality = pic_param->quality;
2758     
2759     //If the app sends the qmatrix, use it, buffer it for using it with the next frames 
2760     //The app can send qmatrix for the first frame and not send for the subsequent frames
2761     if(encode_state->q_matrix && encode_state->q_matrix->buffer) {
2762         qmatrix = (VAQMatrixBufferJPEG *)encode_state->q_matrix->buffer;
2763
2764         mfc_context->buffered_qmatrix.load_lum_quantiser_matrix = 1;
2765         memcpy(mfc_context->buffered_qmatrix.lum_quantiser_matrix, qmatrix->lum_quantiser_matrix, 64 * (sizeof(unsigned char)));
2766
2767         if(pic_param->num_components > 1) {
2768             mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 1;
2769             memcpy(mfc_context->buffered_qmatrix.chroma_quantiser_matrix, qmatrix->chroma_quantiser_matrix, 64 * (sizeof(unsigned char)));
2770         } else {
2771             mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 0;
2772         }
2773
2774     } else {
2775         //If the app doesnt send the qmatrix, use the buffered/default qmatrix
2776         qmatrix = &mfc_context->buffered_qmatrix;
2777         qmatrix->load_lum_quantiser_matrix = 1;
2778         qmatrix->load_chroma_quantiser_matrix = (pic_param->num_components > 1) ? 1 : 0;
2779     }   
2780
2781
2782     //As per the design, normalization of the quality factor and scaling of the Quantization tables
2783     //based on the quality factor needs to be done in the driver before sending the values to the HW.
2784     //But note, the driver expects the scaled quantization tables (as per below logic) to be sent as
2785     //packed header information. The packed header is written as the header of the jpeg file. This
2786     //header information is used to decode the jpeg file. So, it is the app's responsibility to send
2787     //the correct header information (See build_packed_jpeg_header_buffer() in jpegenc.c in LibVa on
2788     //how to do this). QTables can be different for different applications. If no tables are provided,
2789     //the default tables in the driver are used.
2790
2791     //Normalization of the quality factor
2792     if (quality > 100) quality=100;
2793     if (quality == 0)  quality=1;
2794     quality = (quality < 50) ? (5000/quality) : (200 - (quality*2)); 
2795     
2796     //Step 1. Apply Quality factor and clip to range [1, 255] for luma and chroma Quantization matrices
2797     //Step 2. HW expects the 1/Q[i] values in the qm sent, so get reciprocals
2798     //Step 3. HW also expects 32 dwords, hence combine 2 (1/Q) values into 1 dword
2799     //Step 4. Send the Quantization matrix to the HW, use gen8_mfc_fqm_state
2800     
2801     //For luma (Y or R)
2802     if(qmatrix->load_lum_quantiser_matrix) {
2803         //apply quality to lum_quantiser_matrix
2804         for(i=0; i < 64; i++) {
2805             temp = (qmatrix->lum_quantiser_matrix[i] * quality)/100;
2806             //clamp to range [1,255]
2807             temp = (temp > 255) ? 255 : temp;
2808             temp = (temp < 1) ? 1 : temp;
2809             qmatrix->lum_quantiser_matrix[i] = (unsigned char)temp;
2810         }       
2811         
2812         //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order. 
2813         //The App should send it in zigzag. Now, the driver has to extract the raster from it. 
2814         for (j = 0; j < 64; j++)
2815             raster_qm[zigzag_direct[j]] = qmatrix->lum_quantiser_matrix[j];
2816
2817         //Convert the raster order(row-ordered) to the column-raster (column by column).
2818         //To be consistent with the other encoders, send it in column order.
2819         //Need to double check if our HW expects col or row raster.
2820         for (j = 0; j < 64; j++) {
2821             int row = j / 8, col = j % 8;
2822             column_raster_qm[col * 8 + row] = raster_qm[j];
2823         }
2824         
2825         //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2826         get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2827         
2828         //send the luma qm to the command buffer
2829         gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2830     } 
2831     
2832     //For Chroma, if chroma exists (Cb, Cr or G, B)
2833     if(qmatrix->load_chroma_quantiser_matrix) {
2834         //apply quality to chroma_quantiser_matrix
2835         for(i=0; i < 64; i++) {
2836             temp = (qmatrix->chroma_quantiser_matrix[i] * quality)/100;
2837             //clamp to range [1,255]
2838             temp = (temp > 255) ? 255 : temp;
2839             temp = (temp < 1) ? 1 : temp;
2840             qmatrix->chroma_quantiser_matrix[i] = (unsigned char)temp;
2841         }
2842         
2843         //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order. 
2844         //The App should send it in zigzag. Now, the driver has to extract the raster from it. 
2845         for (j = 0; j < 64; j++)
2846             raster_qm[zigzag_direct[j]] = qmatrix->chroma_quantiser_matrix[j];
2847         
2848         //Convert the raster order(row-ordered) to the column-raster (column by column).
2849         //To be consistent with the other encoders, send it in column order.
2850         //Need to double check if our HW expects col or row raster.
2851         for (j = 0; j < 64; j++) {
2852             int row = j / 8, col = j % 8;
2853             column_raster_qm[col * 8 + row] = raster_qm[j];
2854         }
2855
2856
2857         //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2858         get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2859
2860         //send the same chroma qm to the command buffer (for both U,V or G,B)
2861         gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2862         gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);        
2863     }
2864 }
2865
2866
2867 //Translation of Table K.5 into code: This method takes the huffval from the 
2868 //Huffmantable buffer and converts into index for the coefficients and size tables
2869 uint8_t map_huffval_to_index(uint8_t huff_val) 
2870 {
2871     uint8_t index = 0;
2872
2873     if(huff_val < 0xF0) {
2874         index = (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2875     } else {
2876         index = 1 + (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2877     }
2878
2879     return index;
2880 }
2881
2882
2883 //Implementation of Flow chart Annex C  - Figure C.1
2884 static void
2885 generate_huffman_codesizes_table(uint8_t *bits, uint8_t *huff_size_table, uint8_t *lastK) 
2886 {
2887     uint8_t i=1, j=1, k=0;
2888
2889     while(i <= 16) {
2890         while(j <= (uint8_t)bits[i-1]) {
2891             huff_size_table[k] = i;
2892             k = k+1;
2893             j = j+1;
2894         }
2895         
2896         i = i+1;
2897         j = 1;
2898     }
2899     huff_size_table[k] = 0;
2900     (*lastK) = k;    
2901 }
2902
2903 //Implementation of Flow chart Annex C - Figure C.2
2904 static void
2905 generate_huffman_codes_table(uint8_t *huff_size_table, uint16_t *huff_code_table)
2906 {
2907     uint8_t k=0;
2908     uint16_t code=0;
2909     uint8_t si=huff_size_table[k];
2910     
2911     while(huff_size_table[k] != 0) {
2912     
2913         while(huff_size_table[k] == si) {
2914             
2915             // An huffman code can never be 0xFFFF. Replace it with 0 if 0xFFFF 
2916             if(code == 0xFFFF) {
2917                 code = 0x0000;
2918             }
2919
2920             huff_code_table[k] = code;
2921             code = code+1;
2922             k = k+1;
2923         }
2924     
2925         code <<= 1;
2926         si = si+1;
2927     }
2928     
2929 }
2930
2931 //Implementation of Flow chat Annex C - Figure C.3
2932 static void
2933 generate_ordered_codes_table(uint8_t *huff_vals, uint8_t *huff_size_table, uint16_t *huff_code_table, uint8_t type, uint8_t lastK)
2934 {
2935     uint8_t huff_val_size=0, i=0, k=0;
2936     
2937     huff_val_size = (type == 0) ? 12 : 162; 
2938     uint8_t huff_si_table[huff_val_size]; 
2939     uint16_t huff_co_table[huff_val_size];
2940     
2941     memset(huff_si_table, 0, sizeof(huff_si_table));
2942     memset(huff_co_table, 0, sizeof(huff_co_table));
2943     
2944     do {
2945         i = map_huffval_to_index(huff_vals[k]);
2946         huff_co_table[i] = huff_code_table[k];
2947         huff_si_table[i] = huff_size_table[k];
2948         k++;
2949     } while(k < lastK);
2950     
2951     memcpy(huff_size_table, huff_si_table, sizeof(uint8_t)*huff_val_size);
2952     memcpy(huff_code_table, huff_co_table, sizeof(uint16_t)*huff_val_size);
2953 }
2954
2955
2956 //This method converts the huffman table to code words which is needed by the HW
2957 //Flowcharts from Jpeg Spec Annex C - Figure C.1, Figure C.2, Figure C.3 are used here
2958 static void
2959 convert_hufftable_to_codes(VAHuffmanTableBufferJPEGBaseline *huff_buffer, uint32_t *table, uint8_t type, uint8_t index)
2960 {
2961     uint8_t lastK = 0, i=0; 
2962     uint8_t huff_val_size = 0;
2963     uint8_t *huff_bits, *huff_vals;
2964
2965     huff_val_size = (type == 0) ? 12 : 162; 
2966     uint8_t huff_size_table[huff_val_size+1]; //The +1 for adding 0 at the end of huff_val_size
2967     uint16_t huff_code_table[huff_val_size];
2968
2969     memset(huff_size_table, 0, sizeof(huff_size_table));
2970     memset(huff_code_table, 0, sizeof(huff_code_table));
2971
2972     huff_bits = (type == 0) ? (huff_buffer->huffman_table[index].num_dc_codes) : (huff_buffer->huffman_table[index].num_ac_codes);
2973     huff_vals = (type == 0) ? (huff_buffer->huffman_table[index].dc_values) : (huff_buffer->huffman_table[index].ac_values);
2974     
2975
2976     //Generation of table of Huffman code sizes
2977     generate_huffman_codesizes_table(huff_bits, huff_size_table, &lastK);
2978        
2979     //Generation of table of Huffman codes
2980     generate_huffman_codes_table(huff_size_table, huff_code_table);
2981        
2982     //Ordering procedure for encoding procedure code tables
2983     generate_ordered_codes_table(huff_vals, huff_size_table, huff_code_table, type, lastK);
2984
2985     //HW expects Byte0: Code length; Byte1,Byte2: Code Word, Byte3: Dummy
2986     //Since IA is littlended, &, | and << accordingly to store the values in the DWord.
2987     for(i=0; i<huff_val_size; i++) {
2988         table[i] = 0;
2989         table[i] = ((huff_size_table[i] & 0xFF) | ((huff_code_table[i] & 0xFFFF) << 8));
2990     }
2991
2992 }
2993
2994 //send the huffman table using MFC_JPEG_HUFF_TABLE_STATE
2995 static void
2996 gen8_mfc_jpeg_huff_table_state(VADriverContextP ctx,
2997                                            struct encode_state *encode_state,
2998                                            struct intel_encoder_context *encoder_context,
2999                                            int num_tables)
3000 {
3001     VAHuffmanTableBufferJPEGBaseline *huff_buffer;
3002     struct intel_batchbuffer *batch = encoder_context->base.batch;
3003     uint8_t index;
3004     uint32_t dc_table[12], ac_table[162]; 
3005     
3006     assert(encode_state->huffman_table && encode_state->huffman_table->buffer);
3007     huff_buffer = (VAHuffmanTableBufferJPEGBaseline *)encode_state->huffman_table->buffer;
3008
3009     memset(dc_table, 0, 12);
3010     memset(ac_table, 0, 162);
3011
3012     for (index = 0; index < num_tables; index++) {
3013         int id = va_to_gen7_jpeg_hufftable[index];
3014  
3015         if (!huff_buffer->load_huffman_table[index])
3016             continue;
3017      
3018         //load DC table with 12 DWords
3019         convert_hufftable_to_codes(huff_buffer, dc_table, 0, index);  //0 for Dc
3020
3021         //load AC table with 162 DWords 
3022         convert_hufftable_to_codes(huff_buffer, ac_table, 1, index);  //1 for AC 
3023
3024         BEGIN_BCS_BATCH(batch, 176);
3025         OUT_BCS_BATCH(batch, MFC_JPEG_HUFF_TABLE_STATE | (176 - 2));
3026         OUT_BCS_BATCH(batch, id); //Huff table id
3027
3028         //DWord 2 - 13 has DC_TABLE
3029         intel_batchbuffer_data(batch, dc_table, 12*4);
3030
3031         //Dword 14 -175 has AC_TABLE
3032         intel_batchbuffer_data(batch, ac_table, 162*4);
3033         ADVANCE_BCS_BATCH(batch);
3034     }    
3035 }
3036
3037
3038 //This method is used to compute the MCU count used for setting MFC_JPEG_SCAN_OBJECT
3039 static void get_Y_sampling_factors(uint32_t surface_format, uint8_t *h_factor, uint8_t *v_factor)
3040
3041     switch (surface_format) {
3042         case VA_FOURCC_Y800: {
3043             (* h_factor) = 1; 
3044             (* v_factor) = 1;
3045             break;
3046         }
3047         case VA_FOURCC_NV12: { 
3048             (* h_factor) = 2;             
3049             (* v_factor) = 2;
3050             break;
3051         }      
3052         case VA_FOURCC_UYVY: { 
3053             (* h_factor) = 2; 
3054             (* v_factor) = 1;
3055             break;
3056         }
3057         case VA_FOURCC_YUY2: { 
3058             (* h_factor) = 2; 
3059             (* v_factor) = 1;
3060             break;
3061         }
3062         case VA_FOURCC_RGBA:
3063         case VA_FOURCC_444P: { 
3064             (* h_factor) = 1; 
3065             (* v_factor) = 1;
3066             break;
3067         }
3068         default : { //May be  have to insert error handling here. For now just use as below
3069             (* h_factor) = 1; 
3070             (* v_factor) = 1;
3071             break;
3072         }
3073     }
3074 }
3075
3076 //set MFC_JPEG_SCAN_OBJECT
3077 static void
3078 gen8_mfc_jpeg_scan_object(VADriverContextP ctx,
3079                                            struct encode_state *encode_state,
3080                                            struct intel_encoder_context *encoder_context)
3081 {
3082     uint32_t mcu_count, surface_format, Mx, My;
3083     uint8_t i, horizontal_sampling_factor, vertical_sampling_factor, huff_ac_table=0, huff_dc_table=0;
3084     uint8_t is_last_scan = 1;    //Jpeg has only 1 scan per frame. When last scan, HW inserts EOI code.
3085     uint8_t head_present_flag=1; //Header has tables and app data 
3086     uint16_t num_components, restart_interval;   //Specifies number of MCUs in an ECS.
3087     VAEncSliceParameterBufferJPEG *slice_param;
3088     VAEncPictureParameterBufferJPEG *pic_param;
3089     
3090     struct intel_batchbuffer *batch = encoder_context->base.batch;
3091     struct object_surface *obj_surface = encode_state->input_yuv_object;
3092     
3093     assert(encode_state->slice_params_ext[0] && encode_state->slice_params_ext[0]->buffer);
3094     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
3095     assert(obj_surface);
3096     pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
3097     slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[0]->buffer;
3098     surface_format = obj_surface->fourcc;
3099     
3100     get_Y_sampling_factors(surface_format, &horizontal_sampling_factor, &vertical_sampling_factor);
3101     
3102     // Mx = #MCUs in a row, My = #MCUs in a column
3103     Mx = (pic_param->picture_width + (horizontal_sampling_factor*8 -1))/(horizontal_sampling_factor*8);
3104     My = (pic_param->picture_height + (vertical_sampling_factor*8 -1))/(vertical_sampling_factor*8);
3105     mcu_count = (Mx * My);
3106  
3107     num_components = pic_param->num_components;    
3108     restart_interval = slice_param->restart_interval;
3109     
3110     //Depending on number of components and values set for table selectors, 
3111     //only those bits are set in 24:22 for AC table, 20:18 for DC table
3112     for(i=0; i<num_components; i++) {
3113         huff_ac_table |= ((slice_param->components[i].ac_table_selector)<<i);
3114         huff_dc_table |= ((slice_param->components[i].dc_table_selector)<<i);
3115     }
3116     
3117     
3118     BEGIN_BCS_BATCH(batch, 3);
3119     /* DWORD 0 */
3120     OUT_BCS_BATCH(batch, MFC_JPEG_SCAN_OBJECT | (3 - 2)); 
3121     /* DWORD 1 */
3122     OUT_BCS_BATCH(batch, mcu_count << 0);       //MCU Count
3123     /* DWORD 2 */
3124     OUT_BCS_BATCH(batch,
3125                   (huff_ac_table << 22)     |   //Huffman AC Table
3126                   (huff_dc_table << 18)     |   //Huffman DC Table
3127                   (head_present_flag << 17) |   //Head present flag
3128                   (is_last_scan << 16)      |   //Is last scan
3129                   (restart_interval << 0));     //Restart Interval
3130     ADVANCE_BCS_BATCH(batch);
3131 }
3132
3133 static void
3134 gen8_mfc_jpeg_pak_insert_object(struct intel_encoder_context *encoder_context, unsigned int *insert_data, 
3135                                 int length_in_dws, int data_bits_in_last_dw, int is_last_header, 
3136                                 int is_end_of_slice)
3137 {
3138     struct intel_batchbuffer *batch = encoder_context->base.batch;
3139     assert(batch);
3140     
3141     if (data_bits_in_last_dw == 0)
3142         data_bits_in_last_dw = 32;
3143
3144     BEGIN_BCS_BATCH(batch, length_in_dws + 2);
3145
3146     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (length_in_dws + 2 - 2));
3147     //DWord 1
3148     OUT_BCS_BATCH(batch,
3149                   (0 << 16) |                    //DataByteOffset 0 for JPEG Encoder
3150                   (0 << 15) |                    //HeaderLengthExcludeFrmSize 0 for JPEG Encoder
3151                   (data_bits_in_last_dw << 8) |  //DataBitsInLastDW
3152                   (0 << 4) |                     //SkipEmulByteCount 0 for JPEG Encoder
3153                   (0 << 3) |                     //EmulationFlag 0 for JPEG Encoder
3154                   ((!!is_last_header) << 2) |    //LastHeaderFlag
3155                   ((!!is_end_of_slice) << 1) |   //EndOfSliceFlag
3156                   (1 << 0));                     //BitstreamStartReset 1 for JPEG Encoder
3157     //Data Paylaod
3158     intel_batchbuffer_data(batch, insert_data, length_in_dws*4);
3159
3160     ADVANCE_BCS_BATCH(batch);
3161 }
3162
3163
3164 //send the jpeg headers to HW using MFX_PAK_INSERT_OBJECT
3165 static void
3166 gen8_mfc_jpeg_add_headers(VADriverContextP ctx,
3167                                            struct encode_state *encode_state,
3168                                            struct intel_encoder_context *encoder_context)
3169 {
3170     if (encode_state->packed_header_data_ext) {
3171         VAEncPackedHeaderParameterBuffer *param = NULL;
3172         unsigned int *header_data = (unsigned int *)(*encode_state->packed_header_data_ext)->buffer;
3173         unsigned int length_in_bits;
3174
3175         param = (VAEncPackedHeaderParameterBuffer *)(*encode_state->packed_header_params_ext)->buffer;
3176         length_in_bits = param->bit_length;
3177
3178         gen8_mfc_jpeg_pak_insert_object(encoder_context, 
3179                                         header_data, 
3180                                         ALIGN(length_in_bits, 32) >> 5,
3181                                         length_in_bits & 0x1f,
3182                                         1,
3183                                         1);
3184     }
3185 }
3186
3187 //Initialize the buffered_qmatrix with the default qmatrix in the driver.
3188 //If the app sends the qmatrix, this will be replaced with the one app sends.
3189 static void 
3190 jpeg_init_default_qmatrix(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3191 {
3192     int i=0;
3193     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3194  
3195     //Load the the QM in zigzag order. If app sends QM, it is always in zigzag order.
3196     for(i=0; i<64; i++)
3197        mfc_context->buffered_qmatrix.lum_quantiser_matrix[i] = jpeg_luma_quant[zigzag_direct[i]];
3198
3199     for(i=0; i<64; i++)
3200         mfc_context->buffered_qmatrix.chroma_quantiser_matrix[i] = jpeg_chroma_quant[zigzag_direct[i]];
3201 }    
3202  
3203 /* This is at the picture level */
3204 static void
3205 gen8_mfc_jpeg_pipeline_picture_programing(VADriverContextP ctx,
3206                                            struct encode_state *encode_state,
3207                                            struct intel_encoder_context *encoder_context)
3208 {
3209     int i, j, component, max_selector = 0;
3210     VAEncSliceParameterBufferJPEG *slice_param;
3211     
3212     gen8_mfc_pipe_mode_select(ctx, MFX_FORMAT_JPEG, encoder_context);
3213     gen8_mfc_jpeg_set_surface_state(ctx, encoder_context, encode_state);
3214     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
3215     gen8_mfc_ind_obj_base_addr_state(ctx, encoder_context);
3216     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
3217     gen8_mfc_jpeg_pic_state(ctx, encoder_context, encode_state);
3218     
3219     //do the slice level encoding here
3220     gen8_mfc_jpeg_fqm_state(ctx, encoder_context, encode_state);
3221
3222     //I dont think I need this for loop. Just to be consistent with other encoding logic...
3223     for(i = 0; i < encode_state->num_slice_params_ext; i++) {
3224         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[i]->buffer);
3225         slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[i]->buffer;
3226         
3227         for(j = 0; j < encode_state->slice_params_ext[i]->num_elements; j++) {
3228             
3229             for(component = 0; component < slice_param->num_components; component++) {
3230                 if(max_selector < slice_param->components[component].dc_table_selector)
3231                     max_selector = slice_param->components[component].dc_table_selector;
3232                 
3233                 if (max_selector < slice_param->components[component].ac_table_selector)
3234                     max_selector = slice_param->components[component].ac_table_selector;
3235             }
3236             
3237             slice_param++;
3238         }
3239     }    
3240
3241     assert(max_selector < 2);
3242     //send the huffman table using MFC_JPEG_HUFF_TABLE
3243     gen8_mfc_jpeg_huff_table_state(ctx, encode_state, encoder_context, max_selector+1);
3244     //set MFC_JPEG_SCAN_OBJECT
3245     gen8_mfc_jpeg_scan_object(ctx, encode_state, encoder_context);
3246     //add headers using MFX_PAK_INSERT_OBJECT (it is refered as MFX_INSERT_OBJECT in this driver code)
3247     gen8_mfc_jpeg_add_headers(ctx, encode_state, encoder_context);
3248        
3249 }
3250
3251 static void
3252 gen8_mfc_jpeg_pipeline_programing(VADriverContextP ctx,
3253                                    struct encode_state *encode_state,
3254                                    struct intel_encoder_context *encoder_context)
3255 {
3256     struct intel_batchbuffer *batch = encoder_context->base.batch;
3257     
3258     // begin programing
3259     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
3260     intel_batchbuffer_emit_mi_flush(batch);
3261     
3262     // picture level programing
3263     gen8_mfc_jpeg_pipeline_picture_programing(ctx, encode_state, encoder_context);
3264
3265     // end programing
3266     intel_batchbuffer_end_atomic(batch);
3267
3268 }
3269
3270
3271 static VAStatus
3272 gen8_mfc_jpeg_encode_picture(VADriverContextP ctx, 
3273                               struct encode_state *encode_state,
3274                               struct intel_encoder_context *encoder_context)
3275 {
3276     gen8_mfc_init(ctx, encode_state, encoder_context);
3277     intel_mfc_jpeg_prepare(ctx, encode_state, encoder_context);
3278     /*Programing bcs pipeline*/
3279     gen8_mfc_jpeg_pipeline_programing(ctx, encode_state, encoder_context);
3280     gen8_mfc_run(ctx, encode_state, encoder_context);
3281
3282     return VA_STATUS_SUCCESS;
3283 }
3284
3285 static int gen8_mfc_vp8_qindex_estimate(struct encode_state *encode_state,
3286                                         struct gen6_mfc_context *mfc_context,
3287                                         int target_frame_size,
3288                                         int is_key_frame)
3289 {
3290     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3291     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3292     unsigned int max_qindex = pic_param->clamp_qindex_high;
3293     unsigned int min_qindex = pic_param->clamp_qindex_low;
3294     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3295     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3296     int target_mb_size;
3297     int last_size_gap  = -1;
3298     int per_mb_size_at_qindex;
3299     int target_qindex = min_qindex, i;
3300
3301     /* make sure would not overflow*/
3302     if (target_frame_size >= (0x7fffffff >> 9))
3303         target_mb_size = (target_frame_size / width_in_mbs / height_in_mbs) << 9;
3304     else
3305         target_mb_size = (target_frame_size << 9) / width_in_mbs / height_in_mbs;
3306
3307     for (i = min_qindex; i <= max_qindex; i++) {
3308         per_mb_size_at_qindex = vp8_bits_per_mb[!is_key_frame][i];
3309         target_qindex = i;
3310         if (per_mb_size_at_qindex <= target_mb_size) {
3311             if (target_mb_size - per_mb_size_at_qindex < last_size_gap)
3312                 target_qindex--;
3313             break;
3314         }
3315         else
3316             last_size_gap = per_mb_size_at_qindex - target_mb_size;
3317     }
3318
3319     return target_qindex;
3320 }
3321
3322 static void gen8_mfc_vp8_brc_init(struct encode_state *encode_state,
3323                                struct intel_encoder_context* encoder_context)
3324 {
3325     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3326     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3327     VAEncMiscParameterBuffer* misc_param_hrd = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD][0]->buffer;
3328     VAEncMiscParameterHRD* param_hrd = (VAEncMiscParameterHRD*)misc_param_hrd->data;
3329     VAEncMiscParameterBuffer* misc_param_frame_rate_buffer = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFrameRate][0]->buffer;
3330     VAEncMiscParameterFrameRate* param_frame_rate = (VAEncMiscParameterFrameRate*)misc_param_frame_rate_buffer->data;
3331     double bitrate = seq_param->bits_per_second;
3332     unsigned int frame_rate = param_frame_rate->framerate;
3333     int inum = 1, pnum = 0;
3334     int intra_period = seq_param->intra_period;
3335     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3336     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3337     int max_frame_size =  (vp8_bits_per_mb[0][0] >> 9) * width_in_mbs * height_in_mbs;/* vp8_bits_per_mb table mutilpled 512 */
3338
3339     pnum = intra_period  - 1;
3340
3341     mfc_context->brc.mode = encoder_context->rate_control_mode;
3342
3343     mfc_context->brc.target_frame_size[0][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/frame_rate) /
3344                                                              (double)(inum + BRC_PWEIGHT * pnum ));
3345     mfc_context->brc.target_frame_size[0][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[0][SLICE_TYPE_I];
3346
3347     mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
3348     mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
3349
3350     mfc_context->brc.bits_per_frame[0] = bitrate/frame_rate;
3351
3352     mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] = gen8_mfc_vp8_qindex_estimate(encode_state,
3353                                                                                 mfc_context,
3354                                                                                 mfc_context->brc.target_frame_size[0][SLICE_TYPE_I],
3355                                                                                 1);
3356     mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] = gen8_mfc_vp8_qindex_estimate(encode_state,
3357                                                                                 mfc_context,
3358                                                                                 mfc_context->brc.target_frame_size[0][SLICE_TYPE_P],
3359                                                                                 0);
3360
3361     mfc_context->hrd.buffer_size = (double)param_hrd->buffer_size;
3362     mfc_context->hrd.current_buffer_fullness =
3363         (double)(param_hrd->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
3364         param_hrd->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
3365     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
3366     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/max_frame_size;
3367     mfc_context->hrd.violation_noted = 0;
3368 }
3369
3370 static int gen8_mfc_vp8_brc_postpack(struct encode_state *encode_state,
3371                            struct intel_encoder_context *encoder_context,
3372                            int frame_bits)
3373 {
3374     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3375     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
3376     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3377     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3378     int slicetype = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3379     int qpi = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I];
3380     int qpp = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P];
3381     int qp; // quantizer of previously encoded slice of current type
3382     int qpn; // predicted quantizer for next frame of current type in integer format
3383     double qpf; // predicted quantizer for next frame of current type in float format
3384     double delta_qp; // QP correction
3385     int target_frame_size, frame_size_next;
3386     /* Notes:
3387      *  x - how far we are from HRD buffer borders
3388      *  y - how far we are from target HRD buffer fullness
3389      */
3390     double x, y;
3391     double frame_size_alpha;
3392     unsigned int max_qindex = pic_param->clamp_qindex_high;
3393     unsigned int min_qindex = pic_param->clamp_qindex_low;
3394
3395     qp = mfc_context->brc.qp_prime_y[0][slicetype];
3396
3397     target_frame_size = mfc_context->brc.target_frame_size[0][slicetype];
3398     if (mfc_context->hrd.buffer_capacity < 5)
3399         frame_size_alpha = 0;
3400     else
3401         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
3402     if (frame_size_alpha > 30) frame_size_alpha = 30;
3403     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
3404         (double)(frame_size_alpha + 1.);
3405
3406     /* frame_size_next: avoiding negative number and too small value */
3407     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
3408         frame_size_next = (int)((double)target_frame_size * 0.25);
3409
3410     qpf = (double)qp * target_frame_size / frame_size_next;
3411     qpn = (int)(qpf + 0.5);
3412
3413     if (qpn == qp) {
3414         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
3415         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
3416         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
3417             qpn++;
3418             mfc_context->brc.qpf_rounding_accumulator = 0.;
3419         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
3420             qpn--;
3421             mfc_context->brc.qpf_rounding_accumulator = 0.;
3422         }
3423     }
3424
3425     /* making sure that QP is not changing too fast */
3426     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
3427     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
3428     /* making sure that with QP predictions we did do not leave QPs range */
3429     BRC_CLIP(qpn, min_qindex, max_qindex);
3430
3431     /* checking wthether HRD compliance is still met */
3432     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
3433
3434     /* calculating QP delta as some function*/
3435     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
3436     if (x > 0) {
3437         x /= mfc_context->hrd.target_buffer_fullness;
3438         y = mfc_context->hrd.current_buffer_fullness;
3439     }
3440     else {
3441         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
3442         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
3443     }
3444     if (y < 0.01) y = 0.01;
3445     if (x > 1) x = 1;
3446     else if (x < -1) x = -1;
3447
3448     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
3449     qpn = (int)(qpn + delta_qp + 0.5);
3450
3451     /* making sure that with QP predictions we did do not leave QPs range */
3452     BRC_CLIP(qpn, min_qindex, max_qindex);
3453
3454     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
3455         /* correcting QPs of slices of other types */
3456         if (!is_key_frame) {
3457             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 4)
3458                 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 2;
3459         } else {
3460             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 4)
3461                 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
3462         }
3463         BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qindex, max_qindex);
3464         BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qindex, max_qindex);
3465     } else if (sts == BRC_UNDERFLOW) { // underflow
3466         if (qpn <= qp) qpn = qp + 2;
3467         if (qpn > max_qindex) {
3468             qpn = max_qindex;
3469             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
3470         }
3471     } else if (sts == BRC_OVERFLOW) {
3472         if (qpn >= qp) qpn = qp - 2;
3473         if (qpn < min_qindex) { // < 0 (?) overflow with minQP
3474             qpn = min_qindex;
3475             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
3476         }
3477     }
3478
3479     mfc_context->brc.qp_prime_y[0][slicetype] = qpn;
3480
3481     return sts;
3482 }
3483
3484 static void gen8_mfc_vp8_hrd_context_init(struct encode_state *encode_state,
3485                                        struct intel_encoder_context *encoder_context)
3486 {
3487     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3488     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3489     unsigned int rate_control_mode = encoder_context->rate_control_mode;
3490     int target_bit_rate = seq_param->bits_per_second;
3491
3492     // current we only support CBR mode.
3493     if (rate_control_mode == VA_RC_CBR) {
3494         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
3495         mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
3496         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
3497         mfc_context->vui_hrd.i_frame_number = 0;
3498
3499         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
3500         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
3501         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
3502     }
3503
3504 }
3505
3506 static void gen8_mfc_vp8_hrd_context_update(struct encode_state *encode_state,
3507                              struct gen6_mfc_context *mfc_context)
3508 {
3509     mfc_context->vui_hrd.i_frame_number++;
3510 }
3511
3512 /*
3513  * Check whether the parameters related with CBR are updated and decide whether
3514  * it needs to reinitialize the configuration related with CBR.
3515  * Currently it will check the following parameters:
3516  *      bits_per_second
3517  *      frame_rate
3518  *      gop_configuration(intra_period, ip_period, intra_idr_period)
3519  */
3520 static bool gen8_mfc_vp8_brc_updated_check(struct encode_state *encode_state,
3521                            struct intel_encoder_context *encoder_context)
3522 {
3523     unsigned int rate_control_mode = encoder_context->rate_control_mode;
3524     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3525     double cur_fps, cur_bitrate;
3526     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3527     VAEncMiscParameterBuffer *misc_param_frame_rate_buf = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFrameRate][0]->buffer;
3528     VAEncMiscParameterFrameRate *param_frame_rate = (VAEncMiscParameterFrameRate*)misc_param_frame_rate_buf->data;
3529     unsigned int frame_rate = param_frame_rate->framerate;
3530
3531     if (rate_control_mode != VA_RC_CBR) {
3532         return false;
3533     }
3534
3535     cur_bitrate = seq_param->bits_per_second;
3536     cur_fps = frame_rate;
3537
3538     if ((cur_bitrate == mfc_context->brc.saved_bps) &&
3539         (cur_fps == mfc_context->brc.saved_fps) &&
3540         (seq_param->intra_period == mfc_context->brc.saved_intra_period)) {
3541         /* the parameters related with CBR are not updaetd */
3542         return false;
3543     }
3544
3545     mfc_context->brc.saved_intra_period = seq_param->intra_period;
3546     mfc_context->brc.saved_fps = cur_fps;
3547     mfc_context->brc.saved_bps = cur_bitrate;
3548     return true;
3549 }
3550
3551 static void gen8_mfc_vp8_brc_prepare(struct encode_state *encode_state,
3552                            struct intel_encoder_context *encoder_context)
3553 {
3554     unsigned int rate_control_mode = encoder_context->rate_control_mode;
3555
3556     if (rate_control_mode == VA_RC_CBR) {
3557         bool brc_updated;
3558         assert(encoder_context->codec != CODEC_MPEG2);
3559
3560         brc_updated = gen8_mfc_vp8_brc_updated_check(encode_state, encoder_context);
3561
3562         /*Programing bit rate control */
3563         if (brc_updated) {
3564             gen8_mfc_vp8_brc_init(encode_state, encoder_context);
3565         }
3566
3567         /*Programing HRD control */
3568         if (brc_updated)
3569             gen8_mfc_vp8_hrd_context_init(encode_state, encoder_context);
3570     }
3571 }
3572
3573 static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
3574                                VAEncPictureParameterBufferVP8 *pic_param,
3575                                VAQMatrixBufferVP8 *q_matrix)
3576 {
3577
3578     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3579     unsigned char *coeff_probs_stream_in_buffer;
3580     
3581     mfc_context->vp8_state.frame_header_lf_update_pos = 0;
3582     mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
3583     mfc_context->vp8_state.frame_header_token_update_pos = 0;
3584     mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
3585
3586     mfc_context->vp8_state.prob_skip_false = 255;
3587     memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
3588     memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
3589     
3590     if (is_key_frame) {
3591         memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3592         memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3593
3594         mfc_context->vp8_state.prob_intra = 255;
3595         mfc_context->vp8_state.prob_last = 128;
3596         mfc_context->vp8_state.prob_gf = 128;
3597     } else {
3598         memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3599         memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3600
3601         mfc_context->vp8_state.prob_intra = 63;
3602         mfc_context->vp8_state.prob_last = 128;
3603         mfc_context->vp8_state.prob_gf = 128;
3604     }
3605     
3606     mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
3607   
3608     dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
3609     coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
3610     assert(coeff_probs_stream_in_buffer);
3611     memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
3612     dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3613 }
3614
3615 static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
3616                                  VAQMatrixBufferVP8 *q_matrix)
3617 {
3618
3619     /*some other probabilities need to be updated*/
3620 }
3621
3622 extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
3623                            VAEncPictureParameterBufferVP8 *pic_param,
3624                            VAQMatrixBufferVP8 *q_matrix,
3625                            struct gen6_mfc_context *mfc_context,
3626                            struct intel_encoder_context *encoder_context);
3627
3628 static void vp8_enc_frame_header_binarize(struct encode_state *encode_state,
3629                                           struct intel_encoder_context *encoder_context,
3630                                           struct gen6_mfc_context *mfc_context)
3631 {
3632     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3633     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3634     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3635     unsigned char *frame_header_buffer;
3636
3637     binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context, encoder_context);
3638  
3639     dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
3640     frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
3641     assert(frame_header_buffer);
3642     memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
3643     free(mfc_context->vp8_state.vp8_frame_header);
3644     dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
3645 }
3646
3647 #define MAX_VP8_FRAME_HEADER_SIZE              0x2000
3648 #define VP8_TOKEN_STATISTICS_BUFFER_SIZE       0x2000
3649
3650 static void gen8_mfc_vp8_init(VADriverContextP ctx,
3651                           struct encode_state *encode_state,
3652                           struct intel_encoder_context *encoder_context)
3653 {
3654     struct i965_driver_data *i965 = i965_driver_data(ctx);
3655     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3656     dri_bo *bo;
3657     int i;
3658     int width_in_mbs = 0;
3659     int height_in_mbs = 0;
3660     int slice_batchbuffer_size;
3661     int is_key_frame, slice_type, rate_control_mode;
3662
3663     VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3664     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3665     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3666
3667     width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3668     height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3669
3670     is_key_frame = !pic_param->pic_flags.bits.frame_type;
3671     slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3672     rate_control_mode = encoder_context->rate_control_mode;
3673
3674     if (rate_control_mode == VA_RC_CBR) {
3675         q_matrix->quantization_index[0] = mfc_context->brc.qp_prime_y[0][slice_type];
3676         for (i = 1; i < 4; i++)
3677             q_matrix->quantization_index[i] = q_matrix->quantization_index[0];
3678         for (i = 0; i < 5; i++)
3679             q_matrix->quantization_index_delta[i] = 0;
3680     }
3681
3682     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
3683         (SLICE_HEADER + SLICE_TAIL);
3684
3685     /*Encode common setup for MFC*/
3686     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
3687     mfc_context->post_deblocking_output.bo = NULL;
3688
3689     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
3690     mfc_context->pre_deblocking_output.bo = NULL;
3691
3692     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
3693     mfc_context->uncompressed_picture_source.bo = NULL;
3694
3695     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
3696     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
3697
3698     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
3699         if ( mfc_context->direct_mv_buffers[i].bo != NULL)
3700             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
3701         mfc_context->direct_mv_buffers[i].bo = NULL;
3702     }
3703
3704     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
3705         if (mfc_context->reference_surfaces[i].bo != NULL)
3706             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
3707         mfc_context->reference_surfaces[i].bo = NULL;
3708     }
3709
3710     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
3711     bo = dri_bo_alloc(i965->intel.bufmgr,
3712                       "Buffer",
3713                       width_in_mbs * 64 * 16,
3714                       64);
3715     assert(bo);
3716     mfc_context->intra_row_store_scratch_buffer.bo = bo;
3717
3718     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
3719     bo = dri_bo_alloc(i965->intel.bufmgr,
3720                       "Buffer",
3721                       width_in_mbs * height_in_mbs * 16,
3722                       64);
3723     assert(bo);
3724     mfc_context->macroblock_status_buffer.bo = bo;
3725
3726     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
3727     bo = dri_bo_alloc(i965->intel.bufmgr,
3728                       "Buffer",
3729                       16 * width_in_mbs * 64,  /* 16 * width_in_mbs * 64 */
3730                       64);
3731     assert(bo);
3732     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
3733
3734     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
3735     bo = dri_bo_alloc(i965->intel.bufmgr,
3736                       "Buffer",
3737                       16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3738                       0x1000);
3739     assert(bo);
3740     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
3741
3742     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
3743     mfc_context->mfc_batchbuffer_surface.bo = NULL;
3744
3745     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
3746     mfc_context->aux_batchbuffer_surface.bo = NULL;
3747
3748     if (mfc_context->aux_batchbuffer) {
3749         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
3750         mfc_context->aux_batchbuffer = NULL;
3751     }
3752
3753     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
3754     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
3755     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
3756     mfc_context->aux_batchbuffer_surface.pitch = 16;
3757     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
3758     mfc_context->aux_batchbuffer_surface.size_block = 16;
3759
3760     gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
3761
3762     /* alloc vp8 encoding buffers*/
3763     dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
3764     bo = dri_bo_alloc(i965->intel.bufmgr,
3765                       "Buffer",
3766                       MAX_VP8_FRAME_HEADER_SIZE,
3767                       0x1000);
3768     assert(bo);
3769     mfc_context->vp8_state.frame_header_bo = bo;
3770
3771     mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 384 * 9;
3772     for(i = 0; i < 8; i++) {
3773         mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 384 * (i + 1);
3774     }
3775     dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
3776     bo = dri_bo_alloc(i965->intel.bufmgr,
3777                       "Buffer",
3778                       mfc_context->vp8_state.intermediate_buffer_max_size,
3779                       0x1000);
3780     assert(bo);
3781     mfc_context->vp8_state.intermediate_bo = bo;
3782
3783     dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
3784     bo = dri_bo_alloc(i965->intel.bufmgr,
3785                       "Buffer",
3786                       width_in_mbs * height_in_mbs * 16,
3787                       0x1000);
3788     assert(bo);
3789     mfc_context->vp8_state.stream_out_bo = bo;
3790
3791     dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3792     bo = dri_bo_alloc(i965->intel.bufmgr,
3793                       "Buffer",
3794                       sizeof(vp8_default_coef_probs),
3795                       0x1000);
3796     assert(bo);
3797     mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
3798
3799     dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
3800     bo = dri_bo_alloc(i965->intel.bufmgr,
3801                       "Buffer",
3802                       VP8_TOKEN_STATISTICS_BUFFER_SIZE,
3803                       0x1000);
3804     assert(bo);
3805     mfc_context->vp8_state.token_statistics_bo = bo;
3806
3807     dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
3808     bo = dri_bo_alloc(i965->intel.bufmgr,
3809                       "Buffer",
3810                       width_in_mbs * 16 * 64,
3811                       0x1000);
3812     assert(bo);
3813     mfc_context->vp8_state.mpc_row_store_bo = bo;
3814
3815     vp8_enc_state_init(mfc_context, pic_param, q_matrix);
3816     vp8_enc_frame_header_binarize(encode_state, encoder_context, mfc_context);
3817 }
3818
3819 static VAStatus
3820 intel_mfc_vp8_prepare(VADriverContextP ctx,
3821                         struct encode_state *encode_state,
3822                         struct intel_encoder_context *encoder_context)
3823 {
3824     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3825     struct object_surface *obj_surface;
3826     struct object_buffer *obj_buffer;
3827     struct i965_coded_buffer_segment *coded_buffer_segment;
3828     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3829     VAStatus vaStatus = VA_STATUS_SUCCESS;
3830     dri_bo *bo;
3831     int i;
3832
3833     /* reconstructed surface */
3834     obj_surface = encode_state->reconstructed_object;
3835     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3836     if (pic_param->loop_filter_level[0] == 0) {
3837         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
3838         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
3839     } else {
3840         mfc_context->post_deblocking_output.bo = obj_surface->bo;
3841         dri_bo_reference(mfc_context->post_deblocking_output.bo);
3842     }
3843
3844     mfc_context->surface_state.width = obj_surface->orig_width;
3845     mfc_context->surface_state.height = obj_surface->orig_height;
3846     mfc_context->surface_state.w_pitch = obj_surface->width;
3847     mfc_context->surface_state.h_pitch = obj_surface->height;
3848
3849     /* set vp8 reference frames */
3850     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
3851         obj_surface = encode_state->reference_objects[i];
3852
3853         if (obj_surface && obj_surface->bo) {
3854             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
3855             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
3856         } else {
3857             mfc_context->reference_surfaces[i].bo = NULL;
3858         }
3859     }
3860
3861     /* input YUV surface */
3862     obj_surface = encode_state->input_yuv_object;
3863     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
3864     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
3865
3866     /* coded buffer */
3867     obj_buffer = encode_state->coded_buf_object;
3868     bo = obj_buffer->buffer_store->bo;
3869     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
3870     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
3871     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
3872     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
3873
3874     dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
3875     mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
3876     mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
3877     dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
3878
3879     /* set the internal flag to 0 to indicate the coded size is unknown */
3880     dri_bo_map(bo, 1);
3881     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
3882     coded_buffer_segment->mapped = 0;
3883     coded_buffer_segment->codec = encoder_context->codec;
3884     dri_bo_unmap(bo);
3885
3886     return vaStatus;
3887 }
3888
3889 static void
3890 gen8_mfc_vp8_encoder_cfg(VADriverContextP ctx, 
3891                          struct encode_state *encode_state,
3892                          struct intel_encoder_context *encoder_context)
3893 {
3894     struct intel_batchbuffer *batch = encoder_context->base.batch;
3895     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3896     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3897     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3898
3899     BEGIN_BCS_BATCH(batch, 30);
3900     OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
3901
3902     OUT_BCS_BATCH(batch,
3903                   0 << 9 | /* compressed bitstream output disable */
3904                   1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
3905                   1 << 6 | /* RC initial pass */
3906                   0 << 4 | /* upate segment feature date flag */
3907                   1 << 3 | /* bitstream statistics output enable */
3908                   1 << 2 | /* token statistics output enable */
3909                   0 << 1 | /* final bitstream output disable */
3910                   0 << 0); /*DW1*/
3911     
3912     OUT_BCS_BATCH(batch, 0); /*DW2*/
3913
3914     OUT_BCS_BATCH(batch, 
3915                   0xfff << 16 | /* max intra mb bit count limit */
3916                   0xfff << 0  /* max inter mb bit count limit */
3917                   ); /*DW3*/
3918
3919     OUT_BCS_BATCH(batch, 0); /*DW4*/
3920     OUT_BCS_BATCH(batch, 0); /*DW5*/
3921     OUT_BCS_BATCH(batch, 0); /*DW6*/
3922     OUT_BCS_BATCH(batch, 0); /*DW7*/
3923     OUT_BCS_BATCH(batch, 0); /*DW8*/
3924     OUT_BCS_BATCH(batch, 0); /*DW9*/
3925     OUT_BCS_BATCH(batch, 0); /*DW10*/
3926     OUT_BCS_BATCH(batch, 0); /*DW11*/
3927     OUT_BCS_BATCH(batch, 0); /*DW12*/
3928     OUT_BCS_BATCH(batch, 0); /*DW13*/
3929     OUT_BCS_BATCH(batch, 0); /*DW14*/
3930     OUT_BCS_BATCH(batch, 0); /*DW15*/
3931     OUT_BCS_BATCH(batch, 0); /*DW16*/
3932     OUT_BCS_BATCH(batch, 0); /*DW17*/
3933     OUT_BCS_BATCH(batch, 0); /*DW18*/
3934     OUT_BCS_BATCH(batch, 0); /*DW19*/
3935     OUT_BCS_BATCH(batch, 0); /*DW20*/
3936     OUT_BCS_BATCH(batch, 0); /*DW21*/
3937
3938     OUT_BCS_BATCH(batch, 
3939                  pic_param->pic_flags.bits.show_frame << 23 |
3940                  pic_param->pic_flags.bits.version << 20
3941                  ); /*DW22*/
3942
3943     OUT_BCS_BATCH(batch,
3944                  (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
3945                  (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
3946                  );
3947
3948     /*DW24*/
3949     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
3950
3951     /*DW25*/
3952     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
3953
3954     /*DW26*/
3955     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
3956
3957     /*DW27*/
3958     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
3959
3960     /*DW28*/
3961     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
3962
3963     /*DW29*/
3964     OUT_BCS_BATCH(batch, 0);
3965
3966     ADVANCE_BCS_BATCH(batch);
3967 }
3968
3969 static void
3970 gen8_mfc_vp8_pic_state(VADriverContextP ctx,
3971                        struct encode_state *encode_state,
3972                        struct intel_encoder_context *encoder_context)
3973 {
3974     struct intel_batchbuffer *batch = encoder_context->base.batch;
3975     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3976     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3977     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3978     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3979     int i, j, log2num;
3980
3981     log2num = pic_param->pic_flags.bits.num_token_partitions;
3982
3983     /*update mode and token probs*/
3984     vp8_enc_state_update(mfc_context, q_matrix);
3985
3986     BEGIN_BCS_BATCH(batch, 38);
3987     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
3988     OUT_BCS_BATCH(batch,
3989                   (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
3990                   (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
3991  
3992     OUT_BCS_BATCH(batch,
3993                   log2num << 24 |
3994                   pic_param->sharpness_level << 16 |
3995                   pic_param->pic_flags.bits.sign_bias_alternate << 13 |
3996                   pic_param->pic_flags.bits.sign_bias_golden << 12 |
3997                   pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
3998                   pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
3999                   pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
4000                   pic_param->pic_flags.bits.segmentation_enabled << 8 |
4001                   !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
4002                   (pic_param->pic_flags.bits.version / 2) << 4 |
4003                   (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
4004                   !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
4005  
4006     OUT_BCS_BATCH(batch,
4007                   pic_param->loop_filter_level[3] << 24 |
4008                   pic_param->loop_filter_level[2] << 16 |
4009                   pic_param->loop_filter_level[1] <<  8 |
4010                   pic_param->loop_filter_level[0] <<  0);
4011
4012     OUT_BCS_BATCH(batch,
4013                   q_matrix->quantization_index[3] << 24 |
4014                   q_matrix->quantization_index[2] << 16 |
4015                   q_matrix->quantization_index[1] <<  8 |
4016                   q_matrix->quantization_index[0] << 0);
4017
4018     OUT_BCS_BATCH(batch,
4019                  ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 | 
4020                  abs(q_matrix->quantization_index_delta[4]) << 24 |
4021                  ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 | 
4022                  abs(q_matrix->quantization_index_delta[3]) << 16 |
4023                  ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 | 
4024                  abs(q_matrix->quantization_index_delta[2]) << 8 |
4025                  ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 | 
4026                  abs(q_matrix->quantization_index_delta[1]) << 0);
4027
4028     OUT_BCS_BATCH(batch,
4029                  ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
4030                  abs(q_matrix->quantization_index_delta[0]) << 0);
4031     
4032     OUT_BCS_BATCH(batch,
4033                  pic_param->clamp_qindex_high << 8 |
4034                  pic_param->clamp_qindex_low << 0);
4035
4036     for (i = 8; i < 19; i++) {
4037          OUT_BCS_BATCH(batch, 0xffffffff);
4038     }
4039
4040     OUT_BCS_BATCH(batch,
4041                   mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
4042                   mfc_context->vp8_state.mb_segment_tree_probs[1] <<  8 |
4043                   mfc_context->vp8_state.mb_segment_tree_probs[0] <<  0);
4044
4045     OUT_BCS_BATCH(batch,
4046                   mfc_context->vp8_state.prob_skip_false << 24 |
4047                   mfc_context->vp8_state.prob_intra      << 16 |
4048                   mfc_context->vp8_state.prob_last       <<  8 |
4049                   mfc_context->vp8_state.prob_gf         <<  0);
4050
4051     OUT_BCS_BATCH(batch,
4052                   mfc_context->vp8_state.y_mode_probs[3] << 24 |
4053                   mfc_context->vp8_state.y_mode_probs[2] << 16 |
4054                   mfc_context->vp8_state.y_mode_probs[1] <<  8 |
4055                   mfc_context->vp8_state.y_mode_probs[0] <<  0);
4056
4057     OUT_BCS_BATCH(batch,
4058                   mfc_context->vp8_state.uv_mode_probs[2] << 16 |
4059                   mfc_context->vp8_state.uv_mode_probs[1] <<  8 |
4060                   mfc_context->vp8_state.uv_mode_probs[0] <<  0);
4061     
4062     /* MV update value, DW23-DW32 */
4063     for (i = 0; i < 2; i++) {
4064         for (j = 0; j < 20; j += 4) {
4065             OUT_BCS_BATCH(batch,
4066                           (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
4067                           mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
4068                           mfc_context->vp8_state.mv_probs[i][j + 1] <<  8 |
4069                           mfc_context->vp8_state.mv_probs[i][j + 0] <<  0);
4070         }
4071     }
4072
4073     OUT_BCS_BATCH(batch,
4074                   (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
4075                   (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
4076                   (pic_param->ref_lf_delta[1] & 0x7f) <<  8 |
4077                   (pic_param->ref_lf_delta[0] & 0x7f) <<  0);
4078
4079     OUT_BCS_BATCH(batch,
4080                   (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
4081                   (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
4082                   (pic_param->mode_lf_delta[1] & 0x7f) <<  8 |
4083                   (pic_param->mode_lf_delta[0] & 0x7f) <<  0);
4084
4085     OUT_BCS_BATCH(batch, 0);
4086     OUT_BCS_BATCH(batch, 0);
4087     OUT_BCS_BATCH(batch, 0);
4088
4089     ADVANCE_BCS_BATCH(batch);
4090 }
4091
4092 #define OUT_VP8_BUFFER(bo, offset)                                      \
4093     if (bo)                                                             \
4094         OUT_BCS_RELOC(batch,                                            \
4095                       bo,                                               \
4096                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
4097                       offset);                                           \
4098     else                                                                \
4099         OUT_BCS_BATCH(batch, 0);                                        \
4100     OUT_BCS_BATCH(batch, 0);                                            \
4101     OUT_BCS_BATCH(batch, 0);
4102
4103 static void 
4104 gen8_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx, 
4105                                      struct encode_state *encode_state,
4106                                      struct intel_encoder_context *encoder_context)
4107 {
4108     struct intel_batchbuffer *batch = encoder_context->base.batch;
4109     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4110
4111     BEGIN_BCS_BATCH(batch, 32);
4112     OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
4113
4114     OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
4115
4116     OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
4117     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
4118     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
4119     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
4120     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
4121     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
4122     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
4123     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
4124     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
4125     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
4126
4127     OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
4128     OUT_BCS_BATCH(batch, 0);
4129
4130     OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
4131     OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
4132     OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
4133     OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
4134
4135     ADVANCE_BCS_BATCH(batch);
4136 }
4137
4138 static void
4139 gen8_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
4140                                            struct encode_state *encode_state,
4141                                            struct intel_encoder_context *encoder_context)
4142 {
4143     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4144
4145     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
4146     mfc_context->set_surface_state(ctx, encoder_context);
4147     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
4148     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
4149     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
4150     gen8_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
4151     gen8_mfc_vp8_pic_state(ctx, encode_state,encoder_context);
4152     gen8_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
4153 }
4154
4155 static const unsigned char
4156 vp8_intra_mb_mode_map[VME_MB_INTRA_MODE_COUNT] = {
4157     PAK_V_PRED,
4158     PAK_H_PRED,
4159     PAK_DC_PRED,
4160     PAK_TM_PRED
4161 };
4162
4163 static const unsigned char
4164 vp8_intra_block_mode_map[VME_B_INTRA_MODE_COUNT] = {
4165     PAK_B_VE_PRED,
4166     PAK_B_HE_PRED,
4167     PAK_B_DC_PRED,
4168     PAK_B_LD_PRED,
4169     PAK_B_RD_PRED,
4170     PAK_B_VR_PRED,
4171     PAK_B_HD_PRED,
4172     PAK_B_VL_PRED,
4173     PAK_B_HU_PRED
4174 };
4175
4176 static int inline gen8_mfc_vp8_intra_mb_mode_map(unsigned int vme_pred_mode, int is_luma_4x4)
4177 {
4178     unsigned int i, pak_pred_mode = 0;
4179     unsigned int vme_sub_blocks_pred_mode[8], pak_sub_blocks_pred_mode[8]; /* 8 blocks's intra mode */
4180
4181     if (!is_luma_4x4) {
4182         pak_pred_mode = vp8_intra_mb_mode_map[vme_pred_mode & 0x3];
4183     } else {
4184         for (i = 0; i < 8; i++) { 
4185             vme_sub_blocks_pred_mode[i] = ((vme_pred_mode >> (4 * i)) & 0xf);
4186             assert(vme_sub_blocks_pred_mode[i] < VME_B_INTRA_MODE_COUNT);
4187             pak_sub_blocks_pred_mode[i] = vp8_intra_block_mode_map[vme_sub_blocks_pred_mode[i]];
4188             pak_pred_mode |= (pak_sub_blocks_pred_mode[i] << (4 * i));
4189         }
4190     }
4191
4192     return pak_pred_mode;
4193 }
4194 static void
4195 gen8_mfc_vp8_pak_object_intra(VADriverContextP ctx, 
4196                               struct intel_encoder_context *encoder_context,
4197                               unsigned int *msg,
4198                               int x, int y,
4199                               struct intel_batchbuffer *batch)
4200 {
4201     unsigned int vme_intra_mb_mode, vme_chroma_pred_mode;
4202     unsigned int pak_intra_mb_mode, pak_chroma_pred_mode;
4203     unsigned int vme_luma_pred_mode[2], pak_luma_pred_mode[2];
4204
4205     if (batch == NULL)
4206         batch = encoder_context->base.batch;
4207
4208     vme_intra_mb_mode = ((msg[0] & 0x30) >> 4);
4209     assert((vme_intra_mb_mode == 0) || (vme_intra_mb_mode == 2)); //vp8 only support intra_16x16 and intra_4x4
4210     pak_intra_mb_mode = (vme_intra_mb_mode >> 1);
4211
4212     vme_luma_pred_mode[0] = msg[1];
4213     vme_luma_pred_mode[1] = msg[2];
4214     vme_chroma_pred_mode = msg[3] & 0x3;
4215
4216     pak_luma_pred_mode[0] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[0], pak_intra_mb_mode);
4217     pak_luma_pred_mode[1] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[1], pak_intra_mb_mode);
4218     pak_chroma_pred_mode = gen8_mfc_vp8_intra_mb_mode_map(vme_chroma_pred_mode, 0);
4219
4220     BEGIN_BCS_BATCH(batch, 7);
4221
4222     OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4223     OUT_BCS_BATCH(batch, 0);
4224     OUT_BCS_BATCH(batch, 0);
4225     OUT_BCS_BATCH(batch,
4226                   (0 << 20) |                    /* mv format: intra mb */
4227                   (0 << 18) |                    /* Segment ID */
4228                   (0 << 17) |                    /* disable coeff clamp */
4229                   (1 << 13) |                    /* intra mb flag */
4230                   (0 << 11) |                    /* refer picture select: last frame */
4231                   (pak_intra_mb_mode << 8) |     /* mb type */
4232                   (pak_chroma_pred_mode << 4) |  /* mb uv mode */
4233                   (0 << 2) |                     /* skip mb flag: disable */
4234                   0);
4235
4236     OUT_BCS_BATCH(batch, (y << 16) | x);
4237     OUT_BCS_BATCH(batch, pak_luma_pred_mode[0]);
4238     OUT_BCS_BATCH(batch, pak_luma_pred_mode[1]);
4239
4240     ADVANCE_BCS_BATCH(batch);
4241 }
4242
4243 static void
4244 gen8_mfc_vp8_pak_object_inter(VADriverContextP ctx, 
4245                               struct intel_encoder_context *encoder_context,
4246                               unsigned int *msg,
4247                               int offset,
4248                               int x, int y,
4249                               struct intel_batchbuffer *batch)
4250 {
4251     int i;
4252
4253     if (batch == NULL)
4254         batch = encoder_context->base.batch;
4255
4256     /* only support inter_16x16 now */
4257     assert((msg[AVC_INTER_MSG_OFFSET] & INTER_MODE_MASK) == INTER_16X16);
4258     /* for inter_16x16, all 16 MVs should be same, 
4259      * and move mv to the vme mb start address to make sure offset is 64 bytes aligned
4260      * as vp8 spec, all vp8 luma motion vectors are doulbled stored
4261      */
4262     msg[0] = (((msg[AVC_INTER_MV_OFFSET/4] & 0xffff0000) << 1) | ((msg[AVC_INTER_MV_OFFSET/4] << 1) & 0xffff));
4263
4264     for (i = 1; i < 16; i++) {
4265         msg[i] = msg[0];
4266     }
4267     
4268     BEGIN_BCS_BATCH(batch, 7);
4269
4270     OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4271     OUT_BCS_BATCH(batch,
4272                   (0 << 29) |           /* enable inline mv data: disable */
4273                   64);
4274     OUT_BCS_BATCH(batch,
4275                   offset);
4276     OUT_BCS_BATCH(batch,
4277                   (4 << 20) |           /* mv format: inter */
4278                   (0 << 18) |           /* Segment ID */
4279                   (0 << 17) |           /* coeff clamp: disable */
4280                   (0 << 13) |           /* intra mb flag: inter mb */
4281                   (0 << 11) |           /* refer picture select: last frame */
4282                   (0 << 8) |            /* mb type: 16x16 */
4283                   (0 << 4) |            /* mb uv mode: dc_pred */
4284                   (0 << 2) |            /* skip mb flag: disable */
4285                   0);
4286
4287     OUT_BCS_BATCH(batch, (y << 16) | x);
4288
4289     /*new mv*/
4290     OUT_BCS_BATCH(batch, 0x8);
4291     OUT_BCS_BATCH(batch, 0x8);
4292
4293     ADVANCE_BCS_BATCH(batch);
4294 }
4295
4296 static void
4297 gen8_mfc_vp8_pak_pipeline(VADriverContextP ctx,
4298                           struct encode_state *encode_state,
4299                           struct intel_encoder_context *encoder_context,
4300                           struct intel_batchbuffer *slice_batch)
4301 {
4302     struct gen6_vme_context *vme_context = encoder_context->vme_context;
4303     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
4304     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4305     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
4306     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
4307     unsigned int *msg = NULL;
4308     unsigned char *msg_ptr = NULL;
4309     unsigned int i, offset, is_intra_frame;
4310
4311     is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4312
4313     dri_bo_map(vme_context->vme_output.bo , 1);
4314     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
4315
4316     for( i = 0; i < width_in_mbs * height_in_mbs; i++) {
4317         int h_pos = i % width_in_mbs;
4318         int v_pos = i / width_in_mbs;
4319         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
4320         
4321         if (is_intra_frame) {
4322             gen8_mfc_vp8_pak_object_intra(ctx,
4323                     encoder_context,
4324                     msg,
4325                     h_pos, v_pos,
4326                     slice_batch);
4327         } else {
4328             int inter_rdo, intra_rdo;
4329             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
4330             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
4331
4332             if (intra_rdo < inter_rdo) {
4333                 gen8_mfc_vp8_pak_object_intra(ctx,
4334                         encoder_context,
4335                         msg,
4336                         h_pos, v_pos,
4337                         slice_batch);
4338             } else {
4339                 offset = i * vme_context->vme_output.size_block;
4340                 gen8_mfc_vp8_pak_object_inter(ctx,
4341                         encoder_context,
4342                         msg,
4343                         offset,
4344                         h_pos, v_pos,
4345                         slice_batch);
4346             }
4347         }
4348     }
4349
4350     dri_bo_unmap(vme_context->vme_output.bo);
4351 }
4352
4353 /*
4354  * A batch buffer for vp8 pak object commands
4355  */
4356 static dri_bo *
4357 gen8_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
4358                                           struct encode_state *encode_state,
4359                                           struct intel_encoder_context *encoder_context)
4360 {
4361     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4362     struct intel_batchbuffer *batch;
4363     dri_bo *batch_bo;
4364
4365     batch = mfc_context->aux_batchbuffer;
4366     batch_bo = batch->buffer;
4367
4368     gen8_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
4369
4370     intel_batchbuffer_align(batch, 8);
4371
4372     BEGIN_BCS_BATCH(batch, 2);
4373     OUT_BCS_BATCH(batch, 0);
4374     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
4375     ADVANCE_BCS_BATCH(batch);
4376
4377     dri_bo_reference(batch_bo);
4378     intel_batchbuffer_free(batch);
4379     mfc_context->aux_batchbuffer = NULL;
4380
4381     return batch_bo;
4382 }
4383
4384 static void
4385 gen8_mfc_vp8_pipeline_programing(VADriverContextP ctx,
4386                                    struct encode_state *encode_state,
4387                                    struct intel_encoder_context *encoder_context)
4388 {
4389     struct intel_batchbuffer *batch = encoder_context->base.batch;
4390     dri_bo *slice_batch_bo;
4391
4392     slice_batch_bo = gen8_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
4393
4394     // begin programing
4395     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
4396     intel_batchbuffer_emit_mi_flush(batch);
4397
4398     // picture level programing
4399     gen8_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
4400
4401     BEGIN_BCS_BATCH(batch, 4);
4402     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
4403     OUT_BCS_RELOC(batch,
4404                   slice_batch_bo,
4405                   I915_GEM_DOMAIN_COMMAND, 0,
4406                   0);
4407     OUT_BCS_BATCH(batch, 0);
4408     OUT_BCS_BATCH(batch, 0);
4409     ADVANCE_BCS_BATCH(batch);
4410
4411     // end programing
4412     intel_batchbuffer_end_atomic(batch);
4413
4414     dri_bo_unreference(slice_batch_bo);
4415 }
4416
4417 static int gen8_mfc_calc_vp8_coded_buffer_size(VADriverContextP ctx,
4418                           struct encode_state *encode_state,
4419                           struct intel_encoder_context *encoder_context)
4420 {
4421     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4422     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4423     unsigned char is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4424     unsigned int *vp8_encoding_status, i, first_partition_bytes, token_partition_bytes, vp8_coded_bytes;
4425     
4426     int partition_num = 1 << pic_param->pic_flags.bits.num_token_partitions;
4427
4428     first_partition_bytes = token_partition_bytes = vp8_coded_bytes = 0;
4429
4430     dri_bo_map(mfc_context->vp8_state.token_statistics_bo, 0);
4431
4432     vp8_encoding_status = (unsigned int *)mfc_context->vp8_state.token_statistics_bo->virtual;
4433     first_partition_bytes = (vp8_encoding_status[0] + 7) / 8;
4434
4435     for (i = 1; i <= partition_num; i++) 
4436         token_partition_bytes += (vp8_encoding_status[i] + 7) / 8;
4437
4438     /*coded_bytes includes P0~P8 partitions bytes + uncompresse date bytes + partion_size bytes in bitstream + 3 extra bytes */
4439     /*it seems the last partition size in vp8 status buffer is smaller than reality. so add 3 extra bytes */
4440     vp8_coded_bytes = first_partition_bytes + token_partition_bytes + (3 + 7 * !!is_intra_frame) + (partition_num - 1) * 3 + 3;
4441
4442     dri_bo_unmap(mfc_context->vp8_state.token_statistics_bo);
4443
4444     dri_bo_map(mfc_context->vp8_state.final_frame_bo, 0);
4445     struct i965_coded_buffer_segment *coded_buffer_segment = (struct i965_coded_buffer_segment *)(mfc_context->vp8_state.final_frame_bo->virtual);
4446     coded_buffer_segment->base.size = vp8_coded_bytes;
4447     dri_bo_unmap(mfc_context->vp8_state.final_frame_bo);
4448
4449     return vp8_coded_bytes;
4450 }
4451
4452 static VAStatus
4453 gen8_mfc_vp8_encode_picture(VADriverContextP ctx,
4454                               struct encode_state *encode_state,
4455                               struct intel_encoder_context *encoder_context)
4456 {
4457     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4458     unsigned int rate_control_mode = encoder_context->rate_control_mode;
4459     int current_frame_bits_size;
4460     int sts;
4461
4462     gen8_mfc_vp8_init(ctx, encode_state, encoder_context);
4463     intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
4464     /*Programing bcs pipeline*/
4465     gen8_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
4466     gen8_mfc_run(ctx, encode_state, encoder_context);
4467     current_frame_bits_size = 8 * gen8_mfc_calc_vp8_coded_buffer_size(ctx, encode_state, encoder_context);
4468
4469     if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
4470         sts = gen8_mfc_vp8_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
4471         if (sts == BRC_NO_HRD_VIOLATION) {
4472             gen8_mfc_vp8_hrd_context_update(encode_state, mfc_context);
4473         }
4474         else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
4475             if (!mfc_context->hrd.violation_noted) {
4476                 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
4477                 mfc_context->hrd.violation_noted = 1;
4478             }
4479             return VA_STATUS_SUCCESS;
4480         }
4481     }
4482
4483     return VA_STATUS_SUCCESS;
4484 }
4485
4486 static void
4487 gen8_mfc_context_destroy(void *context)
4488 {
4489     struct gen6_mfc_context *mfc_context = context;
4490     int i;
4491
4492     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
4493     mfc_context->post_deblocking_output.bo = NULL;
4494
4495     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
4496     mfc_context->pre_deblocking_output.bo = NULL;
4497
4498     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
4499     mfc_context->uncompressed_picture_source.bo = NULL;
4500
4501     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
4502     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
4503
4504     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
4505         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
4506         mfc_context->direct_mv_buffers[i].bo = NULL;
4507     }
4508
4509     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
4510     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
4511
4512     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
4513     mfc_context->macroblock_status_buffer.bo = NULL;
4514
4515     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
4516     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
4517
4518     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
4519     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
4520
4521
4522     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
4523         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
4524         mfc_context->reference_surfaces[i].bo = NULL;  
4525     }
4526
4527     gen8_gpe_context_destroy(&mfc_context->gpe_context);
4528
4529     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
4530     mfc_context->mfc_batchbuffer_surface.bo = NULL;
4531
4532     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
4533     mfc_context->aux_batchbuffer_surface.bo = NULL;
4534
4535     if (mfc_context->aux_batchbuffer)
4536         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
4537
4538     mfc_context->aux_batchbuffer = NULL;
4539
4540     dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
4541     mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
4542
4543     dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
4544     mfc_context->vp8_state.final_frame_bo = NULL;
4545
4546     dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
4547     mfc_context->vp8_state.frame_header_bo = NULL;
4548
4549     dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
4550     mfc_context->vp8_state.intermediate_bo = NULL;
4551
4552     dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
4553     mfc_context->vp8_state.mpc_row_store_bo = NULL;
4554
4555     dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
4556     mfc_context->vp8_state.stream_out_bo = NULL;
4557
4558     dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
4559     mfc_context->vp8_state.token_statistics_bo = NULL;
4560
4561     free(mfc_context);
4562 }
4563
4564 static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
4565                                   VAProfile profile,
4566                                   struct encode_state *encode_state,
4567                                   struct intel_encoder_context *encoder_context)
4568 {
4569     VAStatus vaStatus;
4570
4571     switch (profile) {
4572     case VAProfileH264ConstrainedBaseline:
4573     case VAProfileH264Main:
4574     case VAProfileH264High:
4575     case VAProfileH264MultiviewHigh:
4576     case VAProfileH264StereoHigh:
4577         vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
4578         break;
4579
4580         /* FIXME: add for other profile */
4581     case VAProfileMPEG2Simple:
4582     case VAProfileMPEG2Main:
4583         vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
4584         break;
4585
4586     case VAProfileJPEGBaseline:
4587         jpeg_init_default_qmatrix(ctx, encoder_context);
4588         vaStatus = gen8_mfc_jpeg_encode_picture(ctx, encode_state, encoder_context);
4589         break;
4590  
4591     case VAProfileVP8Version0_3:
4592         vaStatus = gen8_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
4593         break;
4594  
4595     default:
4596         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
4597         break;
4598     }
4599
4600     return vaStatus;
4601 }
4602
4603 Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
4604 {
4605     struct i965_driver_data *i965 = i965_driver_data(ctx);
4606     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
4607
4608     assert(mfc_context);
4609     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
4610
4611     mfc_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
4612     mfc_context->gpe_context.curbe_size = 32 * 4;
4613     mfc_context->gpe_context.sampler_size = 0;
4614
4615     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
4616     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
4617     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
4618     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
4619     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
4620
4621     if (IS_GEN9(i965->intel.device_info)) {
4622         gen8_gpe_load_kernels(ctx,
4623                           &mfc_context->gpe_context,
4624                           gen9_mfc_kernels,
4625                           1);
4626     } else {
4627         gen8_gpe_load_kernels(ctx,
4628                           &mfc_context->gpe_context,
4629                           gen8_mfc_kernels,
4630                           1);
4631     }
4632
4633     mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select;
4634     mfc_context->set_surface_state = gen8_mfc_surface_state;
4635     mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state;
4636     mfc_context->avc_img_state = gen8_mfc_avc_img_state;
4637     mfc_context->avc_qm_state = gen8_mfc_avc_qm_state;
4638     mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state;
4639     mfc_context->insert_object = gen8_mfc_avc_insert_object;
4640     mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
4641
4642     encoder_context->mfc_context = mfc_context;
4643     encoder_context->mfc_context_destroy = gen8_mfc_context_destroy;
4644     encoder_context->mfc_pipeline = gen8_mfc_pipeline;
4645
4646     if (encoder_context->codec == CODEC_VP8)
4647         encoder_context->mfc_brc_prepare = gen8_mfc_vp8_brc_prepare;
4648     else
4649         encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
4650
4651     return True;
4652 }