OSDN Git Service

Fix AUD insert issue in AVC encoder with GEN6/7.5/8.
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45 #include <va/va_enc_jpeg.h>
46 #include "vp8_probs.h"
47
48 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
49 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
50 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
51
52 #define MFC_SOFTWARE_BATCH      0
53
54 #define B0_STEP_REV     2
55 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
56
57 //Zigzag scan order of the the Luma and Chroma components
58 //Note: Jpeg Spec ISO/IEC 10918-1, Figure A.6 shows the zigzag order differently.
59 //The Spec is trying to show the zigzag pattern with number positions. The below
60 //table will use the pattern shown by A.6 and map the position of the elements in the array
61 static const uint32_t zigzag_direct[64] = {
62     0,   1,  8, 16,  9,  2,  3, 10,
63     17, 24, 32, 25, 18, 11,  4,  5,
64     12, 19, 26, 33, 40, 48, 41, 34,
65     27, 20, 13,  6,  7, 14, 21, 28,
66     35, 42, 49, 56, 57, 50, 43, 36,
67     29, 22, 15, 23, 30, 37, 44, 51,
68     58, 59, 52, 45, 38, 31, 39, 46,
69     53, 60, 61, 54, 47, 55, 62, 63
70 };
71
72 //Default Luminance quantization table
73 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.1
74 static const uint8_t jpeg_luma_quant[64] = {
75     16, 11, 10, 16, 24,  40,  51,  61,
76     12, 12, 14, 19, 26,  58,  60,  55,
77     14, 13, 16, 24, 40,  57,  69,  56,
78     14, 17, 22, 29, 51,  87,  80,  62,
79     18, 22, 37, 56, 68,  109, 103, 77,
80     24, 35, 55, 64, 81,  104, 113, 92,
81     49, 64, 78, 87, 103, 121, 120, 101,
82     72, 92, 95, 98, 112, 100, 103, 99
83 };
84
85 //Default Chroma quantization table
86 //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.2
87 static const uint8_t jpeg_chroma_quant[64] = {
88     17, 18, 24, 47, 99, 99, 99, 99,
89     18, 21, 26, 66, 99, 99, 99, 99,
90     24, 26, 56, 99, 99, 99, 99, 99,
91     47, 66, 99, 99, 99, 99, 99, 99,
92     99, 99, 99, 99, 99, 99, 99, 99,
93     99, 99, 99, 99, 99, 99, 99, 99,
94     99, 99, 99, 99, 99, 99, 99, 99,
95     99, 99, 99, 99, 99, 99, 99, 99
96 };
97
98
99 static const int va_to_gen7_jpeg_hufftable[2] = {
100     MFX_HUFFTABLE_ID_Y,
101     MFX_HUFFTABLE_ID_UV
102 };
103
104 static const uint32_t gen8_mfc_batchbuffer_avc[][4] = {
105 #include "shaders/utils/mfc_batchbuffer_hsw.g8b"
106 };
107
108 static const uint32_t gen9_mfc_batchbuffer_avc[][4] = {
109 #include "shaders/utils/mfc_batchbuffer_hsw.g9b"
110 };
111
112 static struct i965_kernel gen8_mfc_kernels[] = {
113     {
114         "MFC AVC INTRA BATCHBUFFER ",
115         MFC_BATCHBUFFER_AVC_INTRA,
116         gen8_mfc_batchbuffer_avc,
117         sizeof(gen8_mfc_batchbuffer_avc),
118         NULL
119     },
120 };
121
122 static struct i965_kernel gen9_mfc_kernels[] = {
123     {
124         "MFC AVC INTRA BATCHBUFFER ",
125         MFC_BATCHBUFFER_AVC_INTRA,
126         gen9_mfc_batchbuffer_avc,
127         sizeof(gen9_mfc_batchbuffer_avc),
128         NULL
129     },
130 };
131
132 static const uint32_t qm_flat[16] = {
133     0x10101010, 0x10101010, 0x10101010, 0x10101010,
134     0x10101010, 0x10101010, 0x10101010, 0x10101010,
135     0x10101010, 0x10101010, 0x10101010, 0x10101010,
136     0x10101010, 0x10101010, 0x10101010, 0x10101010
137 };
138
139 static const uint32_t fqm_flat[32] = {
140     0x10001000, 0x10001000, 0x10001000, 0x10001000,
141     0x10001000, 0x10001000, 0x10001000, 0x10001000,
142     0x10001000, 0x10001000, 0x10001000, 0x10001000,
143     0x10001000, 0x10001000, 0x10001000, 0x10001000,
144     0x10001000, 0x10001000, 0x10001000, 0x10001000,
145     0x10001000, 0x10001000, 0x10001000, 0x10001000,
146     0x10001000, 0x10001000, 0x10001000, 0x10001000,
147     0x10001000, 0x10001000, 0x10001000, 0x10001000
148 };
149
150 #define     INTER_MODE_MASK     0x03
151 #define     INTER_8X8       0x03
152 #define     INTER_16X8      0x01
153 #define     INTER_8X16      0x02
154 #define     SUBMB_SHAPE_MASK    0x00FF00
155 #define     INTER_16X16     0x00
156
157 #define     INTER_MV8       (4 << 20)
158 #define     INTER_MV32      (6 << 20)
159
160
161 static void
162 gen8_mfc_pipe_mode_select(VADriverContextP ctx,
163                           int standard_select,
164                           struct intel_encoder_context *encoder_context)
165 {
166     struct intel_batchbuffer *batch = encoder_context->base.batch;
167     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
168
169     assert(standard_select == MFX_FORMAT_MPEG2 ||
170            standard_select == MFX_FORMAT_AVC   ||
171            standard_select == MFX_FORMAT_JPEG  ||
172            standard_select == MFX_FORMAT_VP8);
173
174     BEGIN_BCS_BATCH(batch, 5);
175
176     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
177     OUT_BCS_BATCH(batch,
178                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
179                   (MFD_MODE_VLD << 15) | /* VLD mode */
180                   (0 << 10) | /* Stream-Out Enable */
181                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
182                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
183                   (0 << 6)  | /* frame statistics stream-out enable*/
184                   (0 << 5)  | /* not in stitch mode */
185                   (1 << 4)  | /* encoding mode */
186                   (standard_select << 0));  /* standard select: avc or mpeg2 or jpeg*/
187     OUT_BCS_BATCH(batch,
188                   (0 << 7)  | /* expand NOA bus flag */
189                   (0 << 6)  | /* disable slice-level clock gating */
190                   (0 << 5)  | /* disable clock gating for NOA */
191                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
192                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
193                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
194                   (0 << 1)  |
195                   (0 << 0));
196     OUT_BCS_BATCH(batch, 0);
197     OUT_BCS_BATCH(batch, 0);
198
199     ADVANCE_BCS_BATCH(batch);
200 }
201
202 static void
203 gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
204 {
205     struct intel_batchbuffer *batch = encoder_context->base.batch;
206     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
207
208     BEGIN_BCS_BATCH(batch, 6);
209
210     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
211     OUT_BCS_BATCH(batch, 0);
212     OUT_BCS_BATCH(batch,
213                   ((mfc_context->surface_state.height - 1) << 18) |
214                   ((mfc_context->surface_state.width - 1) << 4));
215     OUT_BCS_BATCH(batch,
216                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
217                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
218                   (0 << 22) | /* surface object control state, FIXME??? */
219                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
220                   (0 << 2)  | /* must be 0 for interleave U/V */
221                   (1 << 1)  | /* must be tiled */
222                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
223     OUT_BCS_BATCH(batch,
224                   (0 << 16) |                               /* must be 0 for interleave U/V */
225                   (mfc_context->surface_state.h_pitch));        /* y offset for U(cb) */
226     OUT_BCS_BATCH(batch, 0);
227
228     ADVANCE_BCS_BATCH(batch);
229 }
230
231 static void
232 gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
233                                  struct intel_encoder_context *encoder_context)
234 {
235     struct i965_driver_data *i965 = i965_driver_data(ctx);
236     struct intel_batchbuffer *batch = encoder_context->base.batch;
237     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
238     struct gen6_vme_context *vme_context = encoder_context->vme_context;
239     int vme_size;
240     unsigned int bse_offset;
241
242     BEGIN_BCS_BATCH(batch, 26);
243
244     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
245     /* the DW1-3 is for the MFX indirect bistream offset */
246     OUT_BCS_BATCH(batch, 0);
247     OUT_BCS_BATCH(batch, 0);
248     OUT_BCS_BATCH(batch, 0);
249
250     /* the DW4-5 is the MFX upper bound */
251     if (encoder_context->codec == CODEC_VP8) {
252         OUT_BCS_RELOC64(batch,
253                         mfc_context->mfc_indirect_pak_bse_object.bo,
254                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
255                         mfc_context->mfc_indirect_pak_bse_object.end_offset);
256     } else {
257         OUT_BCS_BATCH(batch, 0);
258         OUT_BCS_BATCH(batch, 0);
259     }
260
261     if (encoder_context->codec != CODEC_JPEG) {
262         vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
263         /* the DW6-10 is for MFX Indirect MV Object Base Address */
264         OUT_BCS_RELOC64(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
265         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
266         OUT_BCS_RELOC64(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
267     } else {
268         /* No VME for JPEG */
269         OUT_BCS_BATCH(batch, 0);
270         OUT_BCS_BATCH(batch, 0);
271         OUT_BCS_BATCH(batch, 0);
272         OUT_BCS_BATCH(batch, 0);
273         OUT_BCS_BATCH(batch, 0);
274     }
275
276     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
277     OUT_BCS_BATCH(batch, 0);
278     OUT_BCS_BATCH(batch, 0);
279     OUT_BCS_BATCH(batch, 0);
280     OUT_BCS_BATCH(batch, 0);
281     OUT_BCS_BATCH(batch, 0);
282
283     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
284     OUT_BCS_BATCH(batch, 0);
285     OUT_BCS_BATCH(batch, 0);
286     OUT_BCS_BATCH(batch, 0);
287     OUT_BCS_BATCH(batch, 0);
288     OUT_BCS_BATCH(batch, 0);
289
290     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
291     bse_offset = (encoder_context->codec == CODEC_JPEG) ? (mfc_context->mfc_indirect_pak_bse_object.offset) : 0;
292     OUT_BCS_RELOC64(batch,
293                     mfc_context->mfc_indirect_pak_bse_object.bo,
294                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
295                     bse_offset);
296     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
297
298     OUT_BCS_RELOC64(batch,
299                     mfc_context->mfc_indirect_pak_bse_object.bo,
300                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
301                     mfc_context->mfc_indirect_pak_bse_object.end_offset);
302
303     ADVANCE_BCS_BATCH(batch);
304 }
305
306 static void
307 gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
308                        struct intel_encoder_context *encoder_context)
309 {
310     struct intel_batchbuffer *batch = encoder_context->base.batch;
311     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
312     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
313
314     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
315     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
316
317     BEGIN_BCS_BATCH(batch, 16);
318
319     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
320     /*DW1. MB setting of frame */
321     OUT_BCS_BATCH(batch,
322                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
323     OUT_BCS_BATCH(batch,
324                   ((height_in_mbs - 1) << 16) |
325                   ((width_in_mbs - 1) << 0));
326     /* DW3 QP setting */
327     OUT_BCS_BATCH(batch,
328                   (0 << 24) |   /* Second Chroma QP Offset */
329                   (0 << 16) |   /* Chroma QP Offset */
330                   (0 << 14) |   /* Max-bit conformance Intra flag */
331                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
332                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
333                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
334                   (0 << 8)  |   /* FIXME: Image Structure */
335                   (0 << 0));    /* Current Decoed Image Frame Store ID, reserved in Encode mode */
336     OUT_BCS_BATCH(batch,
337                   (0 << 16) |   /* Mininum Frame size */
338                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
339                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
340                   (0 << 13) |   /* CABAC 0 word insertion test enable */
341                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
342                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
343                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
344                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
345                   (0 << 6)  |   /* Only valid for VLD decoding mode */
346                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
347                   (0 << 4)  |   /* Direct 8x8 inference flag */
348                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
349                   (1 << 2)  |   /* Frame MB only flag */
350                   (0 << 1)  |   /* MBAFF mode is in active */
351                   (0 << 0));    /* Field picture flag */
352     /* DW5 Trellis quantization */
353     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
354     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
355                   (0xBB8 << 16) |       /* InterMbMaxSz */
356                   (0xEE8));             /* IntraMbMaxSz */
357     OUT_BCS_BATCH(batch, 0);            /* Reserved */
358     /* DW8. QP delta */
359     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
360     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
361     /* DW10. Bit setting for MB */
362     OUT_BCS_BATCH(batch, 0x8C000000);
363     OUT_BCS_BATCH(batch, 0x00010000);
364     /* DW12. */
365     OUT_BCS_BATCH(batch, 0);
366     OUT_BCS_BATCH(batch, 0x02010100);
367     /* DW14. For short format */
368     OUT_BCS_BATCH(batch, 0);
369     OUT_BCS_BATCH(batch, 0);
370
371     ADVANCE_BCS_BATCH(batch);
372 }
373
374 static void
375 gen8_mfc_qm_state(VADriverContextP ctx,
376                   int qm_type,
377                   const uint32_t *qm,
378                   int qm_length,
379                   struct intel_encoder_context *encoder_context)
380 {
381     struct intel_batchbuffer *batch = encoder_context->base.batch;
382     unsigned int qm_buffer[16];
383
384     assert(qm_length <= 16);
385     assert(sizeof(*qm) == 4);
386     memcpy(qm_buffer, qm, qm_length * 4);
387
388     BEGIN_BCS_BATCH(batch, 18);
389     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
390     OUT_BCS_BATCH(batch, qm_type << 0);
391     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
392     ADVANCE_BCS_BATCH(batch);
393 }
394
395 static void
396 gen8_mfc_avc_qm_state(VADriverContextP ctx,
397                       struct encode_state *encode_state,
398                       struct intel_encoder_context *encoder_context)
399 {
400     const unsigned int *qm_4x4_intra;
401     const unsigned int *qm_4x4_inter;
402     const unsigned int *qm_8x8_intra;
403     const unsigned int *qm_8x8_inter;
404     VAEncSequenceParameterBufferH264 *pSeqParameter =
405         (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
406     VAEncPictureParameterBufferH264 *pPicParameter =
407         (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
408
409     if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag
410         && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) {
411         qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
412     } else {
413         VAIQMatrixBufferH264 *qm;
414         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
415         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
416         qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
417         qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
418         qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
419         qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
420     }
421
422     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
423     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
424     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
425     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
426 }
427
428 static void
429 gen8_mfc_fqm_state(VADriverContextP ctx,
430                    int fqm_type,
431                    const uint32_t *fqm,
432                    int fqm_length,
433                    struct intel_encoder_context *encoder_context)
434 {
435     struct intel_batchbuffer *batch = encoder_context->base.batch;
436     unsigned int fqm_buffer[32];
437
438     assert(fqm_length <= 32);
439     assert(sizeof(*fqm) == 4);
440     memcpy(fqm_buffer, fqm, fqm_length * 4);
441
442     BEGIN_BCS_BATCH(batch, 34);
443     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
444     OUT_BCS_BATCH(batch, fqm_type << 0);
445     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
446     ADVANCE_BCS_BATCH(batch);
447 }
448
449 static void
450 gen8_mfc_avc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
451 {
452     int i, j;
453     for (i = 0; i < len; i++)
454         for (j = 0; j < len; j++)
455             fqm[i * len + j] = (1 << 16) / qm[j * len + i];
456 }
457
458 static void
459 gen8_mfc_avc_fqm_state(VADriverContextP ctx,
460                        struct encode_state *encode_state,
461                        struct intel_encoder_context *encoder_context)
462 {
463     VAEncSequenceParameterBufferH264 *pSeqParameter =
464         (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
465     VAEncPictureParameterBufferH264 *pPicParameter =
466         (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
467
468     if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag
469         && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) {
470         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
471         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
472         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
473         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
474     } else {
475         int i;
476         uint32_t fqm[32];
477         VAIQMatrixBufferH264 *qm;
478         assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
479         qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
480
481         for (i = 0; i < 3; i++)
482             gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
483         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
484
485         for (i = 3; i < 6; i++)
486             gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
487         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
488
489         gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
490         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
491
492         gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
493         gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
494     }
495 }
496
497 static void
498 gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
499                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
500                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
501                            struct intel_batchbuffer *batch)
502 {
503     if (batch == NULL)
504         batch = encoder_context->base.batch;
505
506     if (data_bits_in_last_dw == 0)
507         data_bits_in_last_dw = 32;
508
509     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
510
511     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
512     OUT_BCS_BATCH(batch,
513                   (0 << 16) |   /* always start at offset 0 */
514                   (data_bits_in_last_dw << 8) |
515                   (skip_emul_byte_count << 4) |
516                   (!!emulation_flag << 3) |
517                   ((!!is_last_header) << 2) |
518                   ((!!is_end_of_slice) << 1) |
519                   (0 << 0));    /* FIXME: ??? */
520     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
521
522     ADVANCE_BCS_BATCH(batch);
523 }
524
525
526 static void gen8_mfc_init(VADriverContextP ctx,
527                           struct encode_state *encode_state,
528                           struct intel_encoder_context *encoder_context)
529 {
530     struct i965_driver_data *i965 = i965_driver_data(ctx);
531     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
532     dri_bo *bo;
533     int i;
534     int width_in_mbs = 0;
535     int height_in_mbs = 0;
536     int slice_batchbuffer_size;
537
538     if (encoder_context->codec == CODEC_H264 ||
539         encoder_context->codec == CODEC_H264_MVC) {
540         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
541         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
542         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
543     } else if (encoder_context->codec == CODEC_MPEG2) {
544         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
545
546         assert(encoder_context->codec == CODEC_MPEG2);
547
548         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
549         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
550     } else {
551         assert(encoder_context->codec == CODEC_JPEG);
552         VAEncPictureParameterBufferJPEG *pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
553
554         width_in_mbs = ALIGN(pic_param->picture_width, 16) / 16;
555         height_in_mbs = ALIGN(pic_param->picture_height, 16) / 16;
556     }
557
558     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
559                              (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
560
561     /*Encode common setup for MFC*/
562     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
563     mfc_context->post_deblocking_output.bo = NULL;
564
565     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
566     mfc_context->pre_deblocking_output.bo = NULL;
567
568     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
569     mfc_context->uncompressed_picture_source.bo = NULL;
570
571     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
572     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
573
574     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
575         if (mfc_context->direct_mv_buffers[i].bo != NULL)
576             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
577         mfc_context->direct_mv_buffers[i].bo = NULL;
578     }
579
580     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
581         if (mfc_context->reference_surfaces[i].bo != NULL)
582             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
583         mfc_context->reference_surfaces[i].bo = NULL;
584     }
585
586     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
587     bo = dri_bo_alloc(i965->intel.bufmgr,
588                       "Buffer",
589                       width_in_mbs * 64,
590                       64);
591     assert(bo);
592     mfc_context->intra_row_store_scratch_buffer.bo = bo;
593
594     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
595     bo = dri_bo_alloc(i965->intel.bufmgr,
596                       "Buffer",
597                       width_in_mbs * height_in_mbs * 16,
598                       64);
599     assert(bo);
600     mfc_context->macroblock_status_buffer.bo = bo;
601
602     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
603     bo = dri_bo_alloc(i965->intel.bufmgr,
604                       "Buffer",
605                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
606                       64);
607     assert(bo);
608     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
609
610     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
611     bo = dri_bo_alloc(i965->intel.bufmgr,
612                       "Buffer",
613                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
614                       0x1000);
615     assert(bo);
616     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
617
618     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
619     mfc_context->mfc_batchbuffer_surface.bo = NULL;
620
621     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
622     mfc_context->aux_batchbuffer_surface.bo = NULL;
623
624     if (mfc_context->aux_batchbuffer)
625         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
626
627     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
628     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
629     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
630     mfc_context->aux_batchbuffer_surface.pitch = 16;
631     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
632     mfc_context->aux_batchbuffer_surface.size_block = 16;
633
634     gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
635 }
636
637 static void
638 gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx,
639                              struct intel_encoder_context *encoder_context)
640 {
641     struct i965_driver_data *i965 = i965_driver_data(ctx);
642     struct intel_batchbuffer *batch = encoder_context->base.batch;
643     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
644     int i;
645
646     BEGIN_BCS_BATCH(batch, 61);
647
648     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
649
650     /* the DW1-3 is for pre_deblocking */
651     if (mfc_context->pre_deblocking_output.bo)
652         OUT_BCS_RELOC64(batch, mfc_context->pre_deblocking_output.bo,
653                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
654                         0);
655     else {
656         OUT_BCS_BATCH(batch, 0);
657         OUT_BCS_BATCH(batch, 0);                                            /* pre output addr   */
658
659     }
660     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
661     /* the DW4-6 is for the post_deblocking */
662
663     if (mfc_context->post_deblocking_output.bo)
664         OUT_BCS_RELOC64(batch, mfc_context->post_deblocking_output.bo,
665                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
666                         0);                                           /* post output addr  */
667     else {
668         OUT_BCS_BATCH(batch, 0);
669         OUT_BCS_BATCH(batch, 0);
670     }
671
672     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
673
674     /* the DW7-9 is for the uncompressed_picture */
675     OUT_BCS_RELOC64(batch, mfc_context->uncompressed_picture_source.bo,
676                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
677                     0); /* uncompressed data */
678
679     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
680
681     /* the DW10-12 is for the mb status */
682     OUT_BCS_RELOC64(batch, mfc_context->macroblock_status_buffer.bo,
683                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
684                     0); /* StreamOut data*/
685
686     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
687
688     /* the DW13-15 is for the intra_row_store_scratch */
689     OUT_BCS_RELOC64(batch, mfc_context->intra_row_store_scratch_buffer.bo,
690                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
691                     0);
692
693     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
694
695     /* the DW16-18 is for the deblocking filter */
696     OUT_BCS_RELOC64(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
697                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
698                     0);
699
700     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
701
702     /* the DW 19-50 is for Reference pictures*/
703     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
704         if (mfc_context->reference_surfaces[i].bo != NULL) {
705             OUT_BCS_RELOC64(batch, mfc_context->reference_surfaces[i].bo,
706                             I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
707                             0);
708         } else {
709             OUT_BCS_BATCH(batch, 0);
710             OUT_BCS_BATCH(batch, 0);
711         }
712
713     }
714
715     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
716
717     /* The DW 52-54 is for the MB status buffer */
718     OUT_BCS_RELOC64(batch, mfc_context->macroblock_status_buffer.bo,
719                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
720                     0);                                           /* Macroblock status buffer*/
721
722     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
723
724     /* the DW 55-57 is the ILDB buffer */
725     OUT_BCS_BATCH(batch, 0);
726     OUT_BCS_BATCH(batch, 0);
727     OUT_BCS_BATCH(batch, 0);
728
729     /* the DW 58-60 is the second ILDB buffer */
730     OUT_BCS_BATCH(batch, 0);
731     OUT_BCS_BATCH(batch, 0);
732     OUT_BCS_BATCH(batch, 0);
733
734     ADVANCE_BCS_BATCH(batch);
735 }
736
737 static void
738 gen8_mfc_avc_directmode_state(VADriverContextP ctx,
739                               struct intel_encoder_context *encoder_context)
740 {
741     struct i965_driver_data *i965 = i965_driver_data(ctx);
742     struct intel_batchbuffer *batch = encoder_context->base.batch;
743     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
744
745     int i;
746
747     BEGIN_BCS_BATCH(batch, 71);
748
749     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
750
751     /* Reference frames and Current frames */
752     /* the DW1-32 is for the direct MV for reference */
753     for (i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
754         if (mfc_context->direct_mv_buffers[i].bo != NULL) {
755             OUT_BCS_RELOC64(batch, mfc_context->direct_mv_buffers[i].bo,
756                             I915_GEM_DOMAIN_INSTRUCTION, 0,
757                             0);
758         } else {
759             OUT_BCS_BATCH(batch, 0);
760             OUT_BCS_BATCH(batch, 0);
761         }
762     }
763
764     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
765
766     /* the DW34-36 is the MV for the current reference */
767     OUT_BCS_RELOC64(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
768                     I915_GEM_DOMAIN_INSTRUCTION, 0,
769                     0);
770
771     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
772
773     /* POL list */
774     for (i = 0; i < 32; i++) {
775         OUT_BCS_BATCH(batch, i / 2);
776     }
777     OUT_BCS_BATCH(batch, 0);
778     OUT_BCS_BATCH(batch, 0);
779
780     ADVANCE_BCS_BATCH(batch);
781 }
782
783
784 static void
785 gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
786                                  struct intel_encoder_context *encoder_context)
787 {
788     struct i965_driver_data *i965 = i965_driver_data(ctx);
789     struct intel_batchbuffer *batch = encoder_context->base.batch;
790     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
791
792     BEGIN_BCS_BATCH(batch, 10);
793
794     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
795     OUT_BCS_RELOC64(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
796                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
797                     0);
798     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
799
800     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
801     OUT_BCS_BATCH(batch, 0);
802     OUT_BCS_BATCH(batch, 0);
803     OUT_BCS_BATCH(batch, 0);
804
805     /* the DW7-9 is for Bitplane Read Buffer Base Address */
806     OUT_BCS_BATCH(batch, 0);
807     OUT_BCS_BATCH(batch, 0);
808     OUT_BCS_BATCH(batch, 0);
809
810     ADVANCE_BCS_BATCH(batch);
811 }
812
813
814 static void gen8_mfc_avc_pipeline_picture_programing(VADriverContextP ctx,
815                                                      struct encode_state *encode_state,
816                                                      struct intel_encoder_context *encoder_context)
817 {
818     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
819
820     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
821     mfc_context->set_surface_state(ctx, encoder_context);
822     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
823     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
824     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
825     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
826     mfc_context->avc_qm_state(ctx, encode_state, encoder_context);
827     mfc_context->avc_fqm_state(ctx, encode_state, encoder_context);
828     gen8_mfc_avc_directmode_state(ctx, encoder_context);
829     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
830 }
831
832
833 static VAStatus gen8_mfc_run(VADriverContextP ctx,
834                              struct encode_state *encode_state,
835                              struct intel_encoder_context *encoder_context)
836 {
837     struct intel_batchbuffer *batch = encoder_context->base.batch;
838
839     intel_batchbuffer_flush(batch);     //run the pipeline
840
841     return VA_STATUS_SUCCESS;
842 }
843
844
845 static VAStatus
846 gen8_mfc_stop(VADriverContextP ctx,
847               struct encode_state *encode_state,
848               struct intel_encoder_context *encoder_context,
849               int *encoded_bits_size)
850 {
851     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
852     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
853     VACodedBufferSegment *coded_buffer_segment;
854
855     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
856     assert(vaStatus == VA_STATUS_SUCCESS);
857     *encoded_bits_size = coded_buffer_segment->size * 8;
858     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
859
860     return VA_STATUS_SUCCESS;
861 }
862
863
864 static void
865 gen8_mfc_avc_slice_state(VADriverContextP ctx,
866                          VAEncPictureParameterBufferH264 *pic_param,
867                          VAEncSliceParameterBufferH264 *slice_param,
868                          struct encode_state *encode_state,
869                          struct intel_encoder_context *encoder_context,
870                          int rate_control_enable,
871                          int qp,
872                          struct intel_batchbuffer *batch)
873 {
874     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
875     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
876     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
877     int beginmb = slice_param->macroblock_address;
878     int endmb = beginmb + slice_param->num_macroblocks;
879     int beginx = beginmb % width_in_mbs;
880     int beginy = beginmb / width_in_mbs;
881     int nextx =  endmb % width_in_mbs;
882     int nexty = endmb / width_in_mbs;
883     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
884     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
885     int maxQpN, maxQpP;
886     unsigned char correct[6], grow, shrink;
887     int i;
888     int weighted_pred_idc = 0;
889     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
890     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
891     int num_ref_l0 = 0, num_ref_l1 = 0;
892
893     if (batch == NULL)
894         batch = encoder_context->base.batch;
895
896     if (slice_type == SLICE_TYPE_I) {
897         luma_log2_weight_denom = 0;
898         chroma_log2_weight_denom = 0;
899     } else if (slice_type == SLICE_TYPE_P) {
900         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
901         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
902
903         if (slice_param->num_ref_idx_active_override_flag)
904             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
905     } else if (slice_type == SLICE_TYPE_B) {
906         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
907         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
908         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
909
910         if (slice_param->num_ref_idx_active_override_flag) {
911             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
912             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
913         }
914
915         if (weighted_pred_idc == 2) {
916             /* 8.4.3 - Derivation process for prediction weights (8-279) */
917             luma_log2_weight_denom = 5;
918             chroma_log2_weight_denom = 5;
919         }
920     }
921
922     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
923     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
924
925     for (i = 0; i < 6; i++)
926         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
927
928     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
929            (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
930     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
931              (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
932
933     BEGIN_BCS_BATCH(batch, 11);;
934
935     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
936     OUT_BCS_BATCH(batch, slice_type);           /*Slice Type: I:P:B Slice*/
937
938     OUT_BCS_BATCH(batch,
939                   (num_ref_l0 << 16) |
940                   (num_ref_l1 << 24) |
941                   (chroma_log2_weight_denom << 8) |
942                   (luma_log2_weight_denom << 0));
943
944     OUT_BCS_BATCH(batch,
945                   (weighted_pred_idc << 30) |
946                   (slice_param->direct_spatial_mv_pred_flag << 29) |           /*Direct Prediction Type*/
947                   (slice_param->disable_deblocking_filter_idc << 27) |
948                   (slice_param->cabac_init_idc << 24) |
949                   (qp << 16) |          /*Slice Quantization Parameter*/
950                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
951                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
952     OUT_BCS_BATCH(batch,
953                   (beginy << 24) |          /*First MB X&Y , the begin postion of current slice*/
954                   (beginx << 16) |
955                   slice_param->macroblock_address);
956     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
957     OUT_BCS_BATCH(batch,
958                   (0/*rate_control_enable*/ << 31) |        /*in CBR mode RateControlCounterEnable = enable*/
959                   (1 << 30) |       /*ResetRateControlCounter*/
960                   (0 << 28) |       /*RC Triggle Mode = Always Rate Control*/
961                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
962                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/
963                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
964                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/
965                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/
966                   (last_slice << 19) |     /*IsLastSlice*/
967                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
968                   (1 << 17) |       /*HeaderPresentFlag*/
969                   (1 << 16) |       /*SliceData PresentFlag*/
970                   (1 << 15) |       /*TailPresentFlag*/
971                   (1 << 13) |       /*RBSP NAL TYPE*/
972                   (0 << 12));     /*CabacZeroWordInsertionEnable*/
973     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
974     OUT_BCS_BATCH(batch,
975                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/
976                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
977                   (shrink << 8)  |
978                   (grow << 0));
979     OUT_BCS_BATCH(batch,
980                   (correct[5] << 20) |
981                   (correct[4] << 16) |
982                   (correct[3] << 12) |
983                   (correct[2] << 8) |
984                   (correct[1] << 4) |
985                   (correct[0] << 0));
986     OUT_BCS_BATCH(batch, 0);
987
988     ADVANCE_BCS_BATCH(batch);
989 }
990
991 #define    AVC_INTRA_RDO_OFFSET    4
992 #define    AVC_INTER_RDO_OFFSET    10
993 #define    AVC_INTER_MSG_OFFSET    8
994 #define    AVC_INTER_MV_OFFSET     48
995 #define    AVC_RDO_MASK            0xFFFF
996
997 static int
998 gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
999                               int qp, unsigned int *msg,
1000                               struct intel_encoder_context *encoder_context,
1001                               unsigned char target_mb_size, unsigned char max_mb_size,
1002                               struct intel_batchbuffer *batch)
1003 {
1004     int len_in_dwords = 12;
1005     unsigned int intra_msg;
1006 #define     INTRA_MSG_FLAG      (1 << 13)
1007 #define     INTRA_MBTYPE_MASK   (0x1F0000)
1008     if (batch == NULL)
1009         batch = encoder_context->base.batch;
1010
1011     BEGIN_BCS_BATCH(batch, len_in_dwords);
1012
1013     intra_msg = msg[0] & 0xC0FF;
1014     intra_msg |= INTRA_MSG_FLAG;
1015     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1016     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1017     OUT_BCS_BATCH(batch, 0);
1018     OUT_BCS_BATCH(batch, 0);
1019     OUT_BCS_BATCH(batch,
1020                   (0 << 24) |       /* PackedMvNum, Debug*/
1021                   (0 << 20) |       /* No motion vector */
1022                   (1 << 19) |       /* CbpDcY */
1023                   (1 << 18) |       /* CbpDcU */
1024                   (1 << 17) |       /* CbpDcV */
1025                   intra_msg);
1026
1027     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1028     OUT_BCS_BATCH(batch, 0x000F000F);                           /* Code Block Pattern */
1029     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);  /* Last MB */
1030
1031     /*Stuff for Intra MB*/
1032     OUT_BCS_BATCH(batch, msg[1]);           /* We using Intra16x16 no 4x4 predmode*/
1033     OUT_BCS_BATCH(batch, msg[2]);
1034     OUT_BCS_BATCH(batch, msg[3] & 0xFF);
1035
1036     /*MaxSizeInWord and TargetSzieInWord*/
1037     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1038                   (target_mb_size << 16));
1039
1040     OUT_BCS_BATCH(batch, 0);
1041
1042     ADVANCE_BCS_BATCH(batch);
1043
1044     return len_in_dwords;
1045 }
1046
1047 static int
1048 gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1049                               unsigned int *msg, unsigned int offset,
1050                               struct intel_encoder_context *encoder_context,
1051                               unsigned char target_mb_size, unsigned char max_mb_size, int slice_type,
1052                               struct intel_batchbuffer *batch)
1053 {
1054     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1055     int len_in_dwords = 12;
1056     unsigned int inter_msg = 0;
1057     if (batch == NULL)
1058         batch = encoder_context->base.batch;
1059     {
1060 #define MSG_MV_OFFSET   4
1061         unsigned int *mv_ptr;
1062         mv_ptr = msg + MSG_MV_OFFSET;
1063         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1064              * to convert them to be compatible with the format of AVC_PAK
1065              * command.
1066              */
1067         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1068             /* MV[0] and MV[2] are replicated */
1069             mv_ptr[4] = mv_ptr[0];
1070             mv_ptr[5] = mv_ptr[1];
1071             mv_ptr[2] = mv_ptr[8];
1072             mv_ptr[3] = mv_ptr[9];
1073             mv_ptr[6] = mv_ptr[8];
1074             mv_ptr[7] = mv_ptr[9];
1075         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1076             /* MV[0] and MV[1] are replicated */
1077             mv_ptr[2] = mv_ptr[0];
1078             mv_ptr[3] = mv_ptr[1];
1079             mv_ptr[4] = mv_ptr[16];
1080             mv_ptr[5] = mv_ptr[17];
1081             mv_ptr[6] = mv_ptr[24];
1082             mv_ptr[7] = mv_ptr[25];
1083         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1084                    !(msg[1] & SUBMB_SHAPE_MASK)) {
1085             /* Don't touch MV[0] or MV[1] */
1086             mv_ptr[2] = mv_ptr[8];
1087             mv_ptr[3] = mv_ptr[9];
1088             mv_ptr[4] = mv_ptr[16];
1089             mv_ptr[5] = mv_ptr[17];
1090             mv_ptr[6] = mv_ptr[24];
1091             mv_ptr[7] = mv_ptr[25];
1092         }
1093     }
1094
1095     BEGIN_BCS_BATCH(batch, len_in_dwords);
1096
1097     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1098
1099     inter_msg = 32;
1100     /* MV quantity */
1101     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1102         if (msg[1] & SUBMB_SHAPE_MASK)
1103             inter_msg = 128;
1104     }
1105     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1106     OUT_BCS_BATCH(batch, offset);
1107     inter_msg = msg[0] & (0x1F00FFFF);
1108     inter_msg |= INTER_MV8;
1109     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1110     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1111         (msg[1] & SUBMB_SHAPE_MASK)) {
1112         inter_msg |= INTER_MV32;
1113     }
1114
1115     OUT_BCS_BATCH(batch, inter_msg);
1116
1117     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);      /* Code Block Pattern for Y*/
1118     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */
1119 #if 0
1120     if (slice_type == SLICE_TYPE_B) {
1121         OUT_BCS_BATCH(batch, (0xF << 28) | (end_mb << 26) | qp); /* Last MB */
1122     } else {
1123         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1124     }
1125 #else
1126     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1127 #endif
1128
1129     inter_msg = msg[1] >> 8;
1130     /*Stuff for Inter MB*/
1131     OUT_BCS_BATCH(batch, inter_msg);
1132     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1133     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1134
1135     /*MaxSizeInWord and TargetSzieInWord*/
1136     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1137                   (target_mb_size << 16));
1138
1139     OUT_BCS_BATCH(batch, 0x0);
1140
1141     ADVANCE_BCS_BATCH(batch);
1142
1143     return len_in_dwords;
1144 }
1145
1146 static void
1147 gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1148                                        struct encode_state *encode_state,
1149                                        struct intel_encoder_context *encoder_context,
1150                                        int slice_index,
1151                                        struct intel_batchbuffer *slice_batch)
1152 {
1153     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1154     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1155     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1156     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1157     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1158     unsigned int *msg = NULL, offset = 0;
1159     unsigned char *msg_ptr = NULL;
1160     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1161     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1162     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1163     int i, x, y;
1164     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1165     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1166     unsigned int tail_data[] = { 0x0, 0x0 };
1167     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1168     int is_intra = slice_type == SLICE_TYPE_I;
1169     int qp_slice;
1170     int qp_mb;
1171
1172     qp_slice = qp;
1173     if (rate_control_mode != VA_RC_CQP) {
1174         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1175         if (encode_state->slice_header_index[slice_index] == 0) {
1176             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1177             qp_slice = qp;
1178         }
1179     }
1180
1181     /* only support for 8-bit pixel bit-depth */
1182     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1183     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1184     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1185     assert(qp >= 0 && qp < 52);
1186
1187     gen8_mfc_avc_slice_state(ctx,
1188                              pPicParameter,
1189                              pSliceParameter,
1190                              encode_state, encoder_context,
1191                              (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
1192
1193     if (slice_index == 0) {
1194         intel_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, slice_batch);
1195         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1196     }
1197
1198     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1199
1200     dri_bo_map(vme_context->vme_output.bo, 1);
1201     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1202
1203     if (is_intra) {
1204         msg = (unsigned int *)(msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1205     } else {
1206         msg = (unsigned int *)(msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1207     }
1208
1209     for (i = pSliceParameter->macroblock_address;
1210          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1211         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1));
1212         x = i % width_in_mbs;
1213         y = i / width_in_mbs;
1214         msg = (unsigned int *)(msg_ptr + i * vme_context->vme_output.size_block);
1215         if (vme_context->roi_enabled) {
1216             qp_mb = *(vme_context->qp_per_mb + i);
1217         } else
1218             qp_mb = qp;
1219
1220         if (is_intra) {
1221             assert(msg);
1222             gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1223         } else {
1224             int inter_rdo, intra_rdo;
1225             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1226             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1227             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1228             if (intra_rdo < inter_rdo) {
1229                 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch);
1230             } else {
1231                 msg += AVC_INTER_MSG_OFFSET;
1232                 gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1233             }
1234         }
1235     }
1236
1237     dri_bo_unmap(vme_context->vme_output.bo);
1238
1239     if (last_slice) {
1240         mfc_context->insert_object(ctx, encoder_context,
1241                                    tail_data, 2, 8,
1242                                    2, 1, 1, 0, slice_batch);
1243     } else {
1244         mfc_context->insert_object(ctx, encoder_context,
1245                                    tail_data, 1, 8,
1246                                    1, 1, 1, 0, slice_batch);
1247     }
1248 }
1249
1250 static dri_bo *
1251 gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1252                                   struct encode_state *encode_state,
1253                                   struct intel_encoder_context *encoder_context)
1254 {
1255     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1256     struct intel_batchbuffer *batch;
1257     dri_bo *batch_bo;
1258     int i;
1259
1260     batch = mfc_context->aux_batchbuffer;
1261     batch_bo = batch->buffer;
1262     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1263         gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1264     }
1265
1266     intel_batchbuffer_align(batch, 8);
1267
1268     BEGIN_BCS_BATCH(batch, 2);
1269     OUT_BCS_BATCH(batch, 0);
1270     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1271     ADVANCE_BCS_BATCH(batch);
1272
1273     dri_bo_reference(batch_bo);
1274     intel_batchbuffer_free(batch);
1275     mfc_context->aux_batchbuffer = NULL;
1276
1277     return batch_bo;
1278 }
1279
1280
1281 static void
1282 gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1283                                     struct encode_state *encode_state,
1284                                     struct intel_encoder_context *encoder_context)
1285 {
1286     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1287     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1288
1289     assert(vme_context->vme_output.bo);
1290     mfc_context->buffer_suface_setup(ctx,
1291                                      &mfc_context->gpe_context,
1292                                      &vme_context->vme_output,
1293                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1294                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1295 }
1296
1297 static void
1298 gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1299                                      struct encode_state *encode_state,
1300                                      struct intel_encoder_context *encoder_context)
1301 {
1302     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1303     assert(mfc_context->aux_batchbuffer_surface.bo);
1304     mfc_context->buffer_suface_setup(ctx,
1305                                      &mfc_context->gpe_context,
1306                                      &mfc_context->aux_batchbuffer_surface,
1307                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1308                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1309 }
1310
1311 static void
1312 gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1313                                     struct encode_state *encode_state,
1314                                     struct intel_encoder_context *encoder_context)
1315 {
1316     gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1317     gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1318 }
1319
1320 static void
1321 gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1322                                 struct encode_state *encode_state,
1323                                 struct intel_encoder_context *encoder_context)
1324 {
1325     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1326     struct gen8_interface_descriptor_data *desc;
1327     int i;
1328     dri_bo *bo;
1329     unsigned char *desc_ptr;
1330
1331     bo = mfc_context->gpe_context.idrt.bo;
1332     dri_bo_map(bo, 1);
1333     assert(bo->virtual);
1334     desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt.offset;
1335
1336     desc = (struct gen8_interface_descriptor_data *)desc_ptr;
1337
1338     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1339         struct i965_kernel *kernel;
1340         kernel = &mfc_context->gpe_context.kernels[i];
1341         assert(sizeof(*desc) == 32);
1342         /*Setup the descritor table*/
1343         memset(desc, 0, sizeof(*desc));
1344         desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
1345         desc->desc3.sampler_count = 0;
1346         desc->desc3.sampler_state_pointer = 0;
1347         desc->desc4.binding_table_entry_count = 1;
1348         desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1349         desc->desc5.constant_urb_entry_read_offset = 0;
1350         desc->desc5.constant_urb_entry_read_length = 4;
1351
1352
1353         desc++;
1354     }
1355
1356     dri_bo_unmap(bo);
1357
1358     return;
1359 }
1360
1361 static void
1362 gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1363                                     struct encode_state *encode_state,
1364                                     struct intel_encoder_context *encoder_context)
1365 {
1366     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1367
1368     (void)mfc_context;
1369 }
1370
1371 #define AVC_PAK_LEN_IN_BYTE 48
1372 #define AVC_PAK_LEN_IN_OWORD    3
1373
1374 static void
1375 gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1376                                          uint32_t intra_flag,
1377                                          int head_offset,
1378                                          int number_mb_cmds,
1379                                          int slice_end_x,
1380                                          int slice_end_y,
1381                                          int mb_x,
1382                                          int mb_y,
1383                                          int width_in_mbs,
1384                                          int qp,
1385                                          uint32_t fwd_ref,
1386                                          uint32_t bwd_ref)
1387 {
1388     uint32_t temp_value;
1389     BEGIN_BATCH(batch, 14);
1390
1391     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
1392     OUT_BATCH(batch, 0);
1393     OUT_BATCH(batch, 0);
1394     OUT_BATCH(batch, 0);
1395     OUT_BATCH(batch, 0);
1396     OUT_BATCH(batch, 0);
1397
1398     /*inline data */
1399     OUT_BATCH(batch, head_offset / 16);
1400     OUT_BATCH(batch, (intra_flag) | (qp << 16));
1401     temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
1402     OUT_BATCH(batch, temp_value);
1403
1404     OUT_BATCH(batch, number_mb_cmds);
1405
1406     OUT_BATCH(batch,
1407               ((slice_end_y << 8) | (slice_end_x)));
1408     OUT_BATCH(batch, fwd_ref);
1409     OUT_BATCH(batch, bwd_ref);
1410
1411     OUT_BATCH(batch, MI_NOOP);
1412
1413     ADVANCE_BATCH(batch);
1414 }
1415
1416 static void
1417 gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1418                                        struct intel_encoder_context *encoder_context,
1419                                        VAEncSliceParameterBufferH264 *slice_param,
1420                                        int head_offset,
1421                                        int qp,
1422                                        int last_slice)
1423 {
1424     struct intel_batchbuffer *batch = encoder_context->base.batch;
1425     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1426     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1427     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1428     int total_mbs = slice_param->num_macroblocks;
1429     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1430     int number_mb_cmds = 128;
1431     int starting_offset = 0;
1432     int mb_x, mb_y;
1433     int last_mb, slice_end_x, slice_end_y;
1434     int remaining_mb = total_mbs;
1435     uint32_t fwd_ref, bwd_ref, mb_flag;
1436     char tmp_qp;
1437     int number_roi_mbs, max_mb_cmds, i;
1438
1439     last_mb = slice_param->macroblock_address + total_mbs - 1;
1440     slice_end_x = last_mb % width_in_mbs;
1441     slice_end_y = last_mb / width_in_mbs;
1442
1443     if (slice_type == SLICE_TYPE_I) {
1444         fwd_ref = 0;
1445         bwd_ref = 0;
1446         mb_flag = 1;
1447     } else {
1448         fwd_ref = vme_context->ref_index_in_mb[0];
1449         bwd_ref = vme_context->ref_index_in_mb[1];
1450         mb_flag = 0;
1451     }
1452
1453     if (width_in_mbs >= 100) {
1454         number_mb_cmds = width_in_mbs / 5;
1455     } else if (width_in_mbs >= 80) {
1456         number_mb_cmds = width_in_mbs / 4;
1457     } else if (width_in_mbs >= 60) {
1458         number_mb_cmds = width_in_mbs / 3;
1459     } else if (width_in_mbs >= 40) {
1460         number_mb_cmds = width_in_mbs / 2;
1461     } else {
1462         number_mb_cmds = width_in_mbs;
1463     }
1464
1465     max_mb_cmds = number_mb_cmds;
1466
1467     do {
1468         mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
1469         mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
1470
1471         number_mb_cmds = max_mb_cmds;
1472         if (vme_context->roi_enabled) {
1473
1474             number_roi_mbs = 1;
1475             tmp_qp = *(vme_context->qp_per_mb + starting_offset);
1476             for (i = 1; i < max_mb_cmds; i++) {
1477                 if (tmp_qp != *(vme_context->qp_per_mb + starting_offset + i))
1478                     break;
1479
1480                 number_roi_mbs++;
1481             }
1482
1483             number_mb_cmds = number_roi_mbs;
1484             qp = tmp_qp;
1485         }
1486
1487         if (number_mb_cmds >= remaining_mb) {
1488             number_mb_cmds = remaining_mb;
1489         }
1490
1491         gen8_mfc_batchbuffer_emit_object_command(batch,
1492                                                  mb_flag,
1493                                                  head_offset,
1494                                                  number_mb_cmds,
1495                                                  slice_end_x,
1496                                                  slice_end_y,
1497                                                  mb_x,
1498                                                  mb_y,
1499                                                  width_in_mbs,
1500                                                  qp,
1501                                                  fwd_ref,
1502                                                  bwd_ref);
1503
1504         head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
1505         remaining_mb -= number_mb_cmds;
1506         starting_offset += number_mb_cmds;
1507     } while (remaining_mb > 0);
1508 }
1509
1510 static void
1511 gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1512                                struct encode_state *encode_state,
1513                                struct intel_encoder_context *encoder_context,
1514                                int slice_index)
1515 {
1516     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1517     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1518     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1519     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1520     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1521     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1522     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1523     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1524     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1525     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1526     unsigned int tail_data[] = { 0x0, 0x0 };
1527     long head_offset;
1528     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1529     int qp_slice;
1530
1531     qp_slice = qp;
1532     if (rate_control_mode != VA_RC_CQP) {
1533         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
1534         if (encode_state->slice_header_index[slice_index] == 0) {
1535             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1536             qp_slice = qp;
1537         }
1538     }
1539
1540     /* only support for 8-bit pixel bit-depth */
1541     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1542     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1543     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1544     assert(qp >= 0 && qp < 52);
1545
1546     gen8_mfc_avc_slice_state(ctx,
1547                              pPicParameter,
1548                              pSliceParameter,
1549                              encode_state,
1550                              encoder_context,
1551                              (rate_control_mode != VA_RC_CQP),
1552                              qp_slice,
1553                              slice_batch);
1554
1555     if (slice_index == 0) {
1556         intel_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, slice_batch);
1557         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1558     }
1559
1560     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1561
1562     intel_batchbuffer_align(slice_batch, 64); /* aligned by an Cache-line */
1563     head_offset = intel_batchbuffer_used_size(slice_batch);
1564
1565     slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
1566
1567     gen8_mfc_avc_batchbuffer_slice_command(ctx,
1568                                            encoder_context,
1569                                            pSliceParameter,
1570                                            head_offset,
1571                                            qp,
1572                                            last_slice);
1573
1574
1575     /* Aligned for tail */
1576     intel_batchbuffer_align(slice_batch, 64); /* aligned by Cache-line */
1577     if (last_slice) {
1578         mfc_context->insert_object(ctx,
1579                                    encoder_context,
1580                                    tail_data,
1581                                    2,
1582                                    8,
1583                                    2,
1584                                    1,
1585                                    1,
1586                                    0,
1587                                    slice_batch);
1588     } else {
1589         mfc_context->insert_object(ctx,
1590                                    encoder_context,
1591                                    tail_data,
1592                                    1,
1593                                    8,
1594                                    1,
1595                                    1,
1596                                    1,
1597                                    0,
1598                                    slice_batch);
1599     }
1600
1601     return;
1602 }
1603
1604 static void
1605 gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1606                                   struct encode_state *encode_state,
1607                                   struct intel_encoder_context *encoder_context)
1608 {
1609     struct i965_driver_data *i965 = i965_driver_data(ctx);
1610     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1611     struct intel_batchbuffer *batch = encoder_context->base.batch;
1612     int i;
1613
1614     intel_batchbuffer_start_atomic(batch, 0x4000);
1615
1616     if (IS_GEN9(i965->intel.device_info))
1617         gen9_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1618     else
1619         gen8_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1620
1621     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1622         gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
1623     }
1624     {
1625         struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1626
1627         intel_batchbuffer_align(slice_batch, 8);
1628         BEGIN_BCS_BATCH(slice_batch, 2);
1629         OUT_BCS_BATCH(slice_batch, 0);
1630         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
1631         ADVANCE_BCS_BATCH(slice_batch);
1632
1633         BEGIN_BATCH(batch, 2);
1634         OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1635         OUT_BATCH(batch, 0);
1636         ADVANCE_BATCH(batch);
1637
1638         intel_batchbuffer_free(slice_batch);
1639         mfc_context->aux_batchbuffer = NULL;
1640     }
1641
1642     if (IS_GEN9(i965->intel.device_info))
1643         gen9_gpe_pipeline_end(ctx, &mfc_context->gpe_context, batch);
1644
1645     intel_batchbuffer_end_atomic(batch);
1646     intel_batchbuffer_flush(batch);
1647
1648 }
1649
1650 static void
1651 gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1652                                struct encode_state *encode_state,
1653                                struct intel_encoder_context *encoder_context)
1654 {
1655     gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1656     gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1657     gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1658     gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1659 }
1660
1661 static dri_bo *
1662 gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1663                                   struct encode_state *encode_state,
1664                                   struct intel_encoder_context *encoder_context)
1665 {
1666     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1667
1668     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1669     gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1670
1671     return mfc_context->aux_batchbuffer_surface.bo;
1672 }
1673
1674 static void
1675 gen8_mfc_avc_pipeline_programing(VADriverContextP ctx,
1676                                  struct encode_state *encode_state,
1677                                  struct intel_encoder_context *encoder_context)
1678 {
1679     struct intel_batchbuffer *batch = encoder_context->base.batch;
1680     dri_bo *slice_batch_bo;
1681
1682     if (intel_mfc_interlace_check(ctx, encode_state, encoder_context)) {
1683         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1684         assert(0);
1685         return;
1686     }
1687
1688     if (encoder_context->soft_batch_force)
1689         slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1690     else
1691         slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1692
1693
1694     // begin programing
1695     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1696     intel_batchbuffer_emit_mi_flush(batch);
1697
1698     // picture level programing
1699     gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1700
1701     BEGIN_BCS_BATCH(batch, 3);
1702     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1703     OUT_BCS_RELOC64(batch,
1704                     slice_batch_bo,
1705                     I915_GEM_DOMAIN_COMMAND, 0,
1706                     0);
1707     ADVANCE_BCS_BATCH(batch);
1708
1709     // end programing
1710     intel_batchbuffer_end_atomic(batch);
1711
1712     dri_bo_unreference(slice_batch_bo);
1713 }
1714
1715
1716 static VAStatus
1717 gen8_mfc_avc_encode_picture(VADriverContextP ctx,
1718                             struct encode_state *encode_state,
1719                             struct intel_encoder_context *encoder_context)
1720 {
1721     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1722     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1723     int current_frame_bits_size;
1724     int sts;
1725
1726     for (;;) {
1727         gen8_mfc_init(ctx, encode_state, encoder_context);
1728         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1729         /*Programing bcs pipeline*/
1730         gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
1731         gen8_mfc_run(ctx, encode_state, encoder_context);
1732         if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
1733             gen8_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1734             sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
1735             if (sts == BRC_NO_HRD_VIOLATION) {
1736                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1737                 break;
1738             } else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1739                 if (!mfc_context->hrd.violation_noted) {
1740                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP) ? "overflow" : "underflow");
1741                     mfc_context->hrd.violation_noted = 1;
1742                 }
1743                 return VA_STATUS_SUCCESS;
1744             }
1745         } else {
1746             break;
1747         }
1748     }
1749
1750     return VA_STATUS_SUCCESS;
1751 }
1752
1753 /*
1754  * MPEG-2
1755  */
1756
1757 static const int
1758 va_to_gen8_mpeg2_picture_type[3] = {
1759     1,  /* I */
1760     2,  /* P */
1761     3   /* B */
1762 };
1763
1764 static void
1765 gen8_mfc_mpeg2_pic_state(VADriverContextP ctx,
1766                          struct intel_encoder_context *encoder_context,
1767                          struct encode_state *encode_state)
1768 {
1769     struct intel_batchbuffer *batch = encoder_context->base.batch;
1770     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1771     VAEncPictureParameterBufferMPEG2 *pic_param;
1772     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1773     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1774     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1775
1776     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1777     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1778     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1779
1780     BEGIN_BCS_BATCH(batch, 13);
1781     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1782     OUT_BCS_BATCH(batch,
1783                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1784                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1785                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1786                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1787                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1788                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1789                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1790                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1791                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1792                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1793                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1794                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1795     OUT_BCS_BATCH(batch,
1796                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1797                   va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 |
1798                   0);
1799     OUT_BCS_BATCH(batch,
1800                   1 << 31 |     /* slice concealment */
1801                   (height_in_mbs - 1) << 16 |
1802                   (width_in_mbs - 1));
1803
1804     if (slice_param && slice_param->quantiser_scale_code >= 14)
1805         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1806     else
1807         OUT_BCS_BATCH(batch, 0);
1808
1809     OUT_BCS_BATCH(batch, 0);
1810     OUT_BCS_BATCH(batch,
1811                   0xFFF << 16 | /* InterMBMaxSize */
1812                   0xFFF << 0 |  /* IntraMBMaxSize */
1813                   0);
1814     OUT_BCS_BATCH(batch, 0);
1815     OUT_BCS_BATCH(batch, 0);
1816     OUT_BCS_BATCH(batch, 0);
1817     OUT_BCS_BATCH(batch, 0);
1818     OUT_BCS_BATCH(batch, 0);
1819     OUT_BCS_BATCH(batch, 0);
1820     ADVANCE_BCS_BATCH(batch);
1821 }
1822
1823 static void
1824 gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1825 {
1826     unsigned char intra_qm[64] = {
1827         8, 16, 19, 22, 26, 27, 29, 34,
1828         16, 16, 22, 24, 27, 29, 34, 37,
1829         19, 22, 26, 27, 29, 34, 34, 38,
1830         22, 22, 26, 27, 29, 34, 37, 40,
1831         22, 26, 27, 29, 32, 35, 40, 48,
1832         26, 27, 29, 32, 35, 40, 48, 58,
1833         26, 27, 29, 34, 38, 46, 56, 69,
1834         27, 29, 35, 38, 46, 56, 69, 83
1835     };
1836
1837     unsigned char non_intra_qm[64] = {
1838         16, 16, 16, 16, 16, 16, 16, 16,
1839         16, 16, 16, 16, 16, 16, 16, 16,
1840         16, 16, 16, 16, 16, 16, 16, 16,
1841         16, 16, 16, 16, 16, 16, 16, 16,
1842         16, 16, 16, 16, 16, 16, 16, 16,
1843         16, 16, 16, 16, 16, 16, 16, 16,
1844         16, 16, 16, 16, 16, 16, 16, 16,
1845         16, 16, 16, 16, 16, 16, 16, 16
1846     };
1847
1848     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1849     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16, encoder_context);
1850 }
1851
1852 static void
1853 gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1854 {
1855     unsigned short intra_fqm[64] = {
1856         65536 / 0x8, 65536 / 0x10, 65536 / 0x13, 65536 / 0x16, 65536 / 0x16, 65536 / 0x1a, 65536 / 0x1a, 65536 / 0x1b,
1857         65536 / 0x10, 65536 / 0x10, 65536 / 0x16, 65536 / 0x16, 65536 / 0x1a, 65536 / 0x1b, 65536 / 0x1b, 65536 / 0x1d,
1858         65536 / 0x13, 65536 / 0x16, 65536 / 0x1a, 65536 / 0x1a, 65536 / 0x1b, 65536 / 0x1d, 65536 / 0x1d, 65536 / 0x23,
1859         65536 / 0x16, 65536 / 0x18, 65536 / 0x1b, 65536 / 0x1b, 65536 / 0x13, 65536 / 0x20, 65536 / 0x22, 65536 / 0x26,
1860         65536 / 0x1a, 65536 / 0x1b, 65536 / 0x13, 65536 / 0x13, 65536 / 0x20, 65536 / 0x23, 65536 / 0x26, 65536 / 0x2e,
1861         65536 / 0x1b, 65536 / 0x1d, 65536 / 0x22, 65536 / 0x22, 65536 / 0x23, 65536 / 0x28, 65536 / 0x2e, 65536 / 0x38,
1862         65536 / 0x1d, 65536 / 0x22, 65536 / 0x22, 65536 / 0x25, 65536 / 0x28, 65536 / 0x30, 65536 / 0x38, 65536 / 0x45,
1863         65536 / 0x22, 65536 / 0x25, 65536 / 0x26, 65536 / 0x28, 65536 / 0x30, 65536 / 0x3a, 65536 / 0x45, 65536 / 0x53,
1864     };
1865
1866     unsigned short non_intra_fqm[64] = {
1867         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1868         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1869         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1870         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1871         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1872         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1873         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1874         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1875     };
1876
1877     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1878     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1879 }
1880
1881 static void
1882 gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1883                                 struct intel_encoder_context *encoder_context,
1884                                 int x, int y,
1885                                 int next_x, int next_y,
1886                                 int is_fisrt_slice_group,
1887                                 int is_last_slice_group,
1888                                 int intra_slice,
1889                                 int qp,
1890                                 struct intel_batchbuffer *batch)
1891 {
1892     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1893
1894     if (batch == NULL)
1895         batch = encoder_context->base.batch;
1896
1897     BEGIN_BCS_BATCH(batch, 8);
1898
1899     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1900     OUT_BCS_BATCH(batch,
1901                   0 << 31 |                             /* MbRateCtrlFlag */
1902                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1903                   1 << 17 |                             /* Insert Header before the first slice group data */
1904                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1905                   1 << 15 |                             /* TailPresentFlag: always 1 */
1906                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1907                   !!intra_slice << 13 |                 /* IntraSlice */
1908                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1909                   0);
1910     OUT_BCS_BATCH(batch,
1911                   next_y << 24 |
1912                   next_x << 16 |
1913                   y << 8 |
1914                   x << 0 |
1915                   0);
1916     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1917     /* bitstream pointer is only loaded once for the first slice of a frame when
1918      * LoadSlicePointerFlag is 0
1919      */
1920     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1921     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1922     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1923     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1924
1925     ADVANCE_BCS_BATCH(batch);
1926 }
1927
1928 static int
1929 gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1930                                 struct intel_encoder_context *encoder_context,
1931                                 int x, int y,
1932                                 int first_mb_in_slice,
1933                                 int last_mb_in_slice,
1934                                 int first_mb_in_slice_group,
1935                                 int last_mb_in_slice_group,
1936                                 int mb_type,
1937                                 int qp_scale_code,
1938                                 int coded_block_pattern,
1939                                 unsigned char target_size_in_word,
1940                                 unsigned char max_size_in_word,
1941                                 struct intel_batchbuffer *batch)
1942 {
1943     int len_in_dwords = 9;
1944
1945     if (batch == NULL)
1946         batch = encoder_context->base.batch;
1947
1948     BEGIN_BCS_BATCH(batch, len_in_dwords);
1949
1950     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1951     OUT_BCS_BATCH(batch,
1952                   0 << 24 |     /* PackedMvNum */
1953                   0 << 20 |     /* MvFormat */
1954                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1955                   0 << 15 |     /* TransformFlag: frame DCT */
1956                   0 << 14 |     /* FieldMbFlag */
1957                   1 << 13 |     /* IntraMbFlag */
1958                   mb_type << 8 |   /* MbType: Intra */
1959                   0 << 2 |      /* SkipMbFlag */
1960                   0 << 0 |      /* InterMbMode */
1961                   0);
1962     OUT_BCS_BATCH(batch, y << 16 | x);
1963     OUT_BCS_BATCH(batch,
1964                   max_size_in_word << 24 |
1965                   target_size_in_word << 16 |
1966                   coded_block_pattern << 6 |      /* CBP */
1967                   0);
1968     OUT_BCS_BATCH(batch,
1969                   last_mb_in_slice << 31 |
1970                   first_mb_in_slice << 30 |
1971                   0 << 27 |     /* EnableCoeffClamp */
1972                   last_mb_in_slice_group << 26 |
1973                   0 << 25 |     /* MbSkipConvDisable */
1974                   first_mb_in_slice_group << 24 |
1975                   0 << 16 |     /* MvFieldSelect */
1976                   qp_scale_code << 0 |
1977                   0);
1978     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1979     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1980     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1981     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1982
1983     ADVANCE_BCS_BATCH(batch);
1984
1985     return len_in_dwords;
1986 }
1987
1988 /* Byte offset */
1989 #define MPEG2_INTER_MV_OFFSET   48
1990
1991 static struct _mv_ranges {
1992     int low;    /* in the unit of 1/2 pixel */
1993     int high;   /* in the unit of 1/2 pixel */
1994 } mv_ranges[] = {
1995     {0, 0},
1996     { -16, 15},
1997     { -32, 31},
1998     { -64, 63},
1999     { -128, 127},
2000     { -256, 255},
2001     { -512, 511},
2002     { -1024, 1023},
2003     { -2048, 2047},
2004     { -4096, 4095}
2005 };
2006
2007 static int
2008 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2009 {
2010     if (mv + pos * 16 * 2 < 0 ||
2011         mv + (pos + 1) * 16 * 2 > display_max * 2)
2012         mv = 0;
2013
2014     if (f_code > 0 && f_code < 10) {
2015         if (mv < mv_ranges[f_code].low)
2016             mv = mv_ranges[f_code].low;
2017
2018         if (mv > mv_ranges[f_code].high)
2019             mv = mv_ranges[f_code].high;
2020     }
2021
2022     return mv;
2023 }
2024
2025 static int
2026 gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2027                                 struct encode_state *encode_state,
2028                                 struct intel_encoder_context *encoder_context,
2029                                 unsigned int *msg,
2030                                 int width_in_mbs, int height_in_mbs,
2031                                 int x, int y,
2032                                 int first_mb_in_slice,
2033                                 int last_mb_in_slice,
2034                                 int first_mb_in_slice_group,
2035                                 int last_mb_in_slice_group,
2036                                 int qp_scale_code,
2037                                 unsigned char target_size_in_word,
2038                                 unsigned char max_size_in_word,
2039                                 struct intel_batchbuffer *batch)
2040 {
2041     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2042     int len_in_dwords = 9;
2043     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2044
2045     if (batch == NULL)
2046         batch = encoder_context->base.batch;
2047
2048     mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
2049     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2050     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2051     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2052     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2053
2054     BEGIN_BCS_BATCH(batch, len_in_dwords);
2055
2056     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2057     OUT_BCS_BATCH(batch,
2058                   2 << 24 |     /* PackedMvNum */
2059                   7 << 20 |     /* MvFormat */
2060                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2061                   0 << 15 |     /* TransformFlag: frame DCT */
2062                   0 << 14 |     /* FieldMbFlag */
2063                   0 << 13 |     /* IntraMbFlag */
2064                   1 << 8 |      /* MbType: Frame-based */
2065                   0 << 2 |      /* SkipMbFlag */
2066                   0 << 0 |      /* InterMbMode */
2067                   0);
2068     OUT_BCS_BATCH(batch, y << 16 | x);
2069     OUT_BCS_BATCH(batch,
2070                   max_size_in_word << 24 |
2071                   target_size_in_word << 16 |
2072                   0x3f << 6 |   /* CBP */
2073                   0);
2074     OUT_BCS_BATCH(batch,
2075                   last_mb_in_slice << 31 |
2076                   first_mb_in_slice << 30 |
2077                   0 << 27 |     /* EnableCoeffClamp */
2078                   last_mb_in_slice_group << 26 |
2079                   0 << 25 |     /* MbSkipConvDisable */
2080                   first_mb_in_slice_group << 24 |
2081                   0 << 16 |     /* MvFieldSelect */
2082                   qp_scale_code << 0 |
2083                   0);
2084
2085     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2086     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2087     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2088     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2089
2090     ADVANCE_BCS_BATCH(batch);
2091
2092     return len_in_dwords;
2093 }
2094
2095 static void
2096 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2097                                            struct encode_state *encode_state,
2098                                            struct intel_encoder_context *encoder_context,
2099                                            struct intel_batchbuffer *slice_batch)
2100 {
2101     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2102     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2103
2104     if (encode_state->packed_header_data[idx]) {
2105         VAEncPackedHeaderParameterBuffer *param = NULL;
2106         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2107         unsigned int length_in_bits;
2108
2109         assert(encode_state->packed_header_param[idx]);
2110         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2111         length_in_bits = param->bit_length;
2112
2113         mfc_context->insert_object(ctx,
2114                                    encoder_context,
2115                                    header_data,
2116                                    ALIGN(length_in_bits, 32) >> 5,
2117                                    length_in_bits & 0x1f,
2118                                    5,   /* FIXME: check it */
2119                                    0,
2120                                    0,
2121                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2122                                    slice_batch);
2123     }
2124
2125     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2126
2127     if (encode_state->packed_header_data[idx]) {
2128         VAEncPackedHeaderParameterBuffer *param = NULL;
2129         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2130         unsigned int length_in_bits;
2131
2132         assert(encode_state->packed_header_param[idx]);
2133         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2134         length_in_bits = param->bit_length;
2135
2136         mfc_context->insert_object(ctx,
2137                                    encoder_context,
2138                                    header_data,
2139                                    ALIGN(length_in_bits, 32) >> 5,
2140                                    length_in_bits & 0x1f,
2141                                    5,   /* FIXME: check it */
2142                                    0,
2143                                    0,
2144                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2145                                    slice_batch);
2146     }
2147 }
2148
2149 static void
2150 gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2151                                     struct encode_state *encode_state,
2152                                     struct intel_encoder_context *encoder_context,
2153                                     int slice_index,
2154                                     VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2155                                     struct intel_batchbuffer *slice_batch)
2156 {
2157     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2158     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2159     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2160     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2161     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2162     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2163     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2164     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2165     int i, j;
2166     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2167     unsigned int *msg = NULL;
2168     unsigned char *msg_ptr = NULL;
2169
2170     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2171     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2172     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2173     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2174
2175     dri_bo_map(vme_context->vme_output.bo, 0);
2176     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2177
2178     if (next_slice_group_param) {
2179         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2180         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2181     } else {
2182         h_next_start_pos = 0;
2183         v_next_start_pos = height_in_mbs;
2184     }
2185
2186     gen8_mfc_mpeg2_slicegroup_state(ctx,
2187                                     encoder_context,
2188                                     h_start_pos,
2189                                     v_start_pos,
2190                                     h_next_start_pos,
2191                                     v_next_start_pos,
2192                                     slice_index == 0,
2193                                     next_slice_group_param == NULL,
2194                                     slice_param->is_intra_slice,
2195                                     slice_param->quantiser_scale_code,
2196                                     slice_batch);
2197
2198     if (slice_index == 0)
2199         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2200
2201     /* Insert '00' to make sure the header is valid */
2202     mfc_context->insert_object(ctx,
2203                                encoder_context,
2204                                (unsigned int*)section_delimiter,
2205                                1,
2206                                8,   /* 8bits in the last DWORD */
2207                                1,   /* 1 byte */
2208                                1,
2209                                0,
2210                                0,
2211                                slice_batch);
2212
2213     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2214         /* PAK for each macroblocks */
2215         for (j = 0; j < slice_param->num_macroblocks; j++) {
2216             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2217             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2218             int first_mb_in_slice = (j == 0);
2219             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2220             int first_mb_in_slice_group = (i == 0 && j == 0);
2221             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2222                                           j == slice_param->num_macroblocks - 1);
2223
2224             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2225
2226             if (slice_param->is_intra_slice) {
2227                 gen8_mfc_mpeg2_pak_object_intra(ctx,
2228                                                 encoder_context,
2229                                                 h_pos, v_pos,
2230                                                 first_mb_in_slice,
2231                                                 last_mb_in_slice,
2232                                                 first_mb_in_slice_group,
2233                                                 last_mb_in_slice_group,
2234                                                 0x1a,
2235                                                 slice_param->quantiser_scale_code,
2236                                                 0x3f,
2237                                                 0,
2238                                                 0xff,
2239                                                 slice_batch);
2240             } else {
2241                 int inter_rdo, intra_rdo;
2242                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2243                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2244
2245                 if (intra_rdo < inter_rdo)
2246                     gen8_mfc_mpeg2_pak_object_intra(ctx,
2247                                                     encoder_context,
2248                                                     h_pos, v_pos,
2249                                                     first_mb_in_slice,
2250                                                     last_mb_in_slice,
2251                                                     first_mb_in_slice_group,
2252                                                     last_mb_in_slice_group,
2253                                                     0x1a,
2254                                                     slice_param->quantiser_scale_code,
2255                                                     0x3f,
2256                                                     0,
2257                                                     0xff,
2258                                                     slice_batch);
2259                 else
2260                     gen8_mfc_mpeg2_pak_object_inter(ctx,
2261                                                     encode_state,
2262                                                     encoder_context,
2263                                                     msg,
2264                                                     width_in_mbs, height_in_mbs,
2265                                                     h_pos, v_pos,
2266                                                     first_mb_in_slice,
2267                                                     last_mb_in_slice,
2268                                                     first_mb_in_slice_group,
2269                                                     last_mb_in_slice_group,
2270                                                     slice_param->quantiser_scale_code,
2271                                                     0,
2272                                                     0xff,
2273                                                     slice_batch);
2274             }
2275         }
2276
2277         slice_param++;
2278     }
2279
2280     dri_bo_unmap(vme_context->vme_output.bo);
2281
2282     /* tail data */
2283     if (next_slice_group_param == NULL) { /* end of a picture */
2284         mfc_context->insert_object(ctx,
2285                                    encoder_context,
2286                                    (unsigned int *)tail_delimiter,
2287                                    2,
2288                                    8,   /* 8bits in the last DWORD */
2289                                    5,   /* 5 bytes */
2290                                    1,
2291                                    1,
2292                                    0,
2293                                    slice_batch);
2294     } else {        /* end of a lsice group */
2295         mfc_context->insert_object(ctx,
2296                                    encoder_context,
2297                                    (unsigned int *)section_delimiter,
2298                                    1,
2299                                    8,   /* 8bits in the last DWORD */
2300                                    1,   /* 1 byte */
2301                                    1,
2302                                    1,
2303                                    0,
2304                                    slice_batch);
2305     }
2306 }
2307
2308 /*
2309  * A batch buffer for all slices, including slice state,
2310  * slice insert object and slice pak object commands
2311  *
2312  */
2313 static dri_bo *
2314 gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2315                                           struct encode_state *encode_state,
2316                                           struct intel_encoder_context *encoder_context)
2317 {
2318     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2319     struct intel_batchbuffer *batch;
2320     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2321     dri_bo *batch_bo;
2322     int i;
2323
2324     batch = mfc_context->aux_batchbuffer;
2325     batch_bo = batch->buffer;
2326
2327     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2328         if (i == encode_state->num_slice_params_ext - 1)
2329             next_slice_group_param = NULL;
2330         else
2331             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2332
2333         gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2334     }
2335
2336     intel_batchbuffer_align(batch, 8);
2337
2338     BEGIN_BCS_BATCH(batch, 2);
2339     OUT_BCS_BATCH(batch, 0);
2340     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2341     ADVANCE_BCS_BATCH(batch);
2342
2343     dri_bo_reference(batch_bo);
2344     intel_batchbuffer_free(batch);
2345     mfc_context->aux_batchbuffer = NULL;
2346
2347     return batch_bo;
2348 }
2349
2350 static void
2351 gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2352                                            struct encode_state *encode_state,
2353                                            struct intel_encoder_context *encoder_context)
2354 {
2355     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2356
2357     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2358     mfc_context->set_surface_state(ctx, encoder_context);
2359     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2360     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
2361     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2362     gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2363     gen8_mfc_mpeg2_qm_state(ctx, encoder_context);
2364     gen8_mfc_mpeg2_fqm_state(ctx, encoder_context);
2365 }
2366
2367 static void
2368 gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2369                                    struct encode_state *encode_state,
2370                                    struct intel_encoder_context *encoder_context)
2371 {
2372     struct intel_batchbuffer *batch = encoder_context->base.batch;
2373     dri_bo *slice_batch_bo;
2374
2375     slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2376
2377     // begin programing
2378     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
2379     intel_batchbuffer_emit_mi_flush(batch);
2380
2381     // picture level programing
2382     gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2383
2384     BEGIN_BCS_BATCH(batch, 4);
2385     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
2386     OUT_BCS_RELOC64(batch,
2387                     slice_batch_bo,
2388                     I915_GEM_DOMAIN_COMMAND, 0,
2389                     0);
2390     OUT_BCS_BATCH(batch, 0);
2391     ADVANCE_BCS_BATCH(batch);
2392
2393     // end programing
2394     intel_batchbuffer_end_atomic(batch);
2395
2396     dri_bo_unreference(slice_batch_bo);
2397 }
2398
2399 static VAStatus
2400 intel_mfc_mpeg2_prepare(VADriverContextP ctx,
2401                         struct encode_state *encode_state,
2402                         struct intel_encoder_context *encoder_context)
2403 {
2404     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2405     struct object_surface *obj_surface;
2406     struct object_buffer *obj_buffer;
2407     struct i965_coded_buffer_segment *coded_buffer_segment;
2408     VAStatus vaStatus = VA_STATUS_SUCCESS;
2409     dri_bo *bo;
2410     int i;
2411
2412     /* reconstructed surface */
2413     obj_surface = encode_state->reconstructed_object;
2414     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2415     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2416     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2417     mfc_context->surface_state.width = obj_surface->orig_width;
2418     mfc_context->surface_state.height = obj_surface->orig_height;
2419     mfc_context->surface_state.w_pitch = obj_surface->width;
2420     mfc_context->surface_state.h_pitch = obj_surface->height;
2421
2422     /* forward reference */
2423     obj_surface = encode_state->reference_objects[0];
2424
2425     if (obj_surface && obj_surface->bo) {
2426         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2427         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2428     } else
2429         mfc_context->reference_surfaces[0].bo = NULL;
2430
2431     /* backward reference */
2432     obj_surface = encode_state->reference_objects[1];
2433
2434     if (obj_surface && obj_surface->bo) {
2435         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2436         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2437     } else {
2438         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2439
2440         if (mfc_context->reference_surfaces[1].bo)
2441             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2442     }
2443
2444     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2445         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2446
2447         if (mfc_context->reference_surfaces[i].bo)
2448             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2449     }
2450
2451     /* input YUV surface */
2452     obj_surface = encode_state->input_yuv_object;
2453     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2454     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2455
2456     /* coded buffer */
2457     obj_buffer = encode_state->coded_buf_object;
2458     bo = obj_buffer->buffer_store->bo;
2459     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2460     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2461     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2462     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2463
2464     /* set the internal flag to 0 to indicate the coded size is unknown */
2465     dri_bo_map(bo, 1);
2466     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2467     coded_buffer_segment->mapped = 0;
2468     coded_buffer_segment->codec = encoder_context->codec;
2469     dri_bo_unmap(bo);
2470
2471     return vaStatus;
2472 }
2473
2474 static VAStatus
2475 gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx,
2476                               struct encode_state *encode_state,
2477                               struct intel_encoder_context *encoder_context)
2478 {
2479     gen8_mfc_init(ctx, encode_state, encoder_context);
2480     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2481     /*Programing bcs pipeline*/
2482     gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2483     gen8_mfc_run(ctx, encode_state, encoder_context);
2484
2485     return VA_STATUS_SUCCESS;
2486 }
2487
2488 /* JPEG encode methods */
2489
2490 static VAStatus
2491 intel_mfc_jpeg_prepare(VADriverContextP ctx,
2492                        struct encode_state *encode_state,
2493                        struct intel_encoder_context *encoder_context)
2494 {
2495     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2496     struct object_surface *obj_surface;
2497     struct object_buffer *obj_buffer;
2498     struct i965_coded_buffer_segment *coded_buffer_segment;
2499     VAStatus vaStatus = VA_STATUS_SUCCESS;
2500     dri_bo *bo;
2501
2502     /* input YUV surface */
2503     obj_surface = encode_state->input_yuv_object;
2504     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2505     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2506
2507     /* coded buffer */
2508     obj_buffer = encode_state->coded_buf_object;
2509     bo = obj_buffer->buffer_store->bo;
2510     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2511     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2512     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2513     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2514
2515     /* set the internal flag to 0 to indicate the coded size is unknown */
2516     dri_bo_map(bo, 1);
2517     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2518     coded_buffer_segment->mapped = 0;
2519     coded_buffer_segment->codec = encoder_context->codec;
2520     dri_bo_unmap(bo);
2521
2522     return vaStatus;
2523 }
2524
2525
2526 static void
2527 gen8_mfc_jpeg_set_surface_state(VADriverContextP ctx,
2528                                 struct intel_encoder_context *encoder_context,
2529                                 struct encode_state *encode_state)
2530 {
2531     struct intel_batchbuffer *batch = encoder_context->base.batch;
2532     struct object_surface *obj_surface = encode_state->input_yuv_object;
2533     unsigned int input_fourcc;
2534     unsigned int y_cb_offset;
2535     unsigned int y_cr_offset;
2536     unsigned int surface_format;
2537
2538     assert(obj_surface);
2539
2540     y_cb_offset = obj_surface->y_cb_offset;
2541     y_cr_offset = obj_surface->y_cr_offset;
2542     input_fourcc = obj_surface->fourcc;
2543
2544     surface_format = (obj_surface->fourcc == VA_FOURCC_Y800) ?
2545                      MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
2546
2547
2548     switch (input_fourcc) {
2549     case VA_FOURCC_Y800: {
2550         surface_format = MFX_SURFACE_MONOCHROME;
2551         break;
2552     }
2553     case VA_FOURCC_NV12: {
2554         surface_format = MFX_SURFACE_PLANAR_420_8;
2555         break;
2556     }
2557     case VA_FOURCC_UYVY: {
2558         surface_format = MFX_SURFACE_YCRCB_SWAPY;
2559         break;
2560     }
2561     case VA_FOURCC_YUY2: {
2562         surface_format = MFX_SURFACE_YCRCB_NORMAL;
2563         break;
2564     }
2565     case VA_FOURCC_RGBA:
2566     case VA_FOURCC_444P: {
2567         surface_format = MFX_SURFACE_R8G8B8A8_UNORM;
2568         break;
2569     }
2570     }
2571
2572     BEGIN_BCS_BATCH(batch, 6);
2573
2574     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2575     OUT_BCS_BATCH(batch, 0);
2576     OUT_BCS_BATCH(batch,
2577                   ((obj_surface->orig_height - 1) << 18) |
2578                   ((obj_surface->orig_width - 1) << 4));
2579     OUT_BCS_BATCH(batch,
2580                   (surface_format << 28) | /* Surface Format */
2581                   (0 << 27) | /* must be 1 for interleave U/V, hardware requirement for AVC/VC1/MPEG and 0 for JPEG */
2582                   (0 << 22) | /* surface object control state, FIXME??? */
2583                   ((obj_surface->width - 1) << 3) | /* pitch */
2584                   (0 << 2)  | /* must be 0 for interleave U/V */
2585                   (1 << 1)  | /* must be tiled */
2586                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
2587     OUT_BCS_BATCH(batch,
2588                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2589                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
2590     OUT_BCS_BATCH(batch,
2591                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2592                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoeo for JPEG */
2593
2594
2595     ADVANCE_BCS_BATCH(batch);
2596 }
2597
2598 static void
2599 gen8_mfc_jpeg_pic_state(VADriverContextP ctx,
2600                         struct intel_encoder_context *encoder_context,
2601                         struct encode_state *encode_state)
2602 {
2603     struct intel_batchbuffer *batch = encoder_context->base.batch;
2604     struct object_surface *obj_surface = encode_state->input_yuv_object;
2605     VAEncPictureParameterBufferJPEG *pic_param;
2606     unsigned int  surface_format;
2607     unsigned int  frame_width_in_blks;
2608     unsigned int  frame_height_in_blks;
2609     unsigned int  pixels_in_horizontal_lastMCU;
2610     unsigned int  pixels_in_vertical_lastMCU;
2611     unsigned int  input_surface_format;
2612     unsigned int  output_mcu_format;
2613     unsigned int  picture_width;
2614     unsigned int  picture_height;
2615
2616     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2617     assert(obj_surface);
2618     pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2619     surface_format = obj_surface->fourcc;
2620     picture_width = pic_param->picture_width;
2621     picture_height = pic_param->picture_height;
2622
2623     switch (surface_format) {
2624     case VA_FOURCC_Y800: {
2625         input_surface_format = JPEG_ENC_SURFACE_Y8;
2626         output_mcu_format = JPEG_ENC_MCU_YUV400;
2627         break;
2628     }
2629     case VA_FOURCC_NV12: {
2630         input_surface_format = JPEG_ENC_SURFACE_NV12;
2631         output_mcu_format = JPEG_ENC_MCU_YUV420;
2632         break;
2633     }
2634     case VA_FOURCC_UYVY: {
2635         input_surface_format = JPEG_ENC_SURFACE_UYVY;
2636         output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
2637         break;
2638     }
2639     case VA_FOURCC_YUY2: {
2640         input_surface_format = JPEG_ENC_SURFACE_YUY2;
2641         output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
2642         break;
2643     }
2644
2645     case VA_FOURCC_RGBA:
2646     case VA_FOURCC_444P: {
2647         input_surface_format = JPEG_ENC_SURFACE_RGB;
2648         output_mcu_format = JPEG_ENC_MCU_RGB;
2649         break;
2650     }
2651     default : {
2652         input_surface_format = JPEG_ENC_SURFACE_NV12;
2653         output_mcu_format = JPEG_ENC_MCU_YUV420;
2654         break;
2655     }
2656     }
2657
2658
2659     switch (output_mcu_format) {
2660
2661     case JPEG_ENC_MCU_YUV400:
2662     case JPEG_ENC_MCU_RGB: {
2663         pixels_in_horizontal_lastMCU = (picture_width % 8);
2664         pixels_in_vertical_lastMCU = (picture_height % 8);
2665
2666         //H1=1,V1=1 for YUV400 and YUV444. So, compute these values accordingly
2667         frame_width_in_blks = ((picture_width + 7) / 8);
2668         frame_height_in_blks = ((picture_height + 7) / 8);
2669         break;
2670     }
2671
2672     case JPEG_ENC_MCU_YUV420: {
2673         if ((picture_width % 2) == 0)
2674             pixels_in_horizontal_lastMCU = picture_width % 16;
2675         else
2676             pixels_in_horizontal_lastMCU   = ((picture_width % 16) + 1) % 16;
2677
2678         if ((picture_height % 2) == 0)
2679             pixels_in_vertical_lastMCU     = picture_height % 16;
2680         else
2681             pixels_in_vertical_lastMCU   = ((picture_height % 16) + 1) % 16;
2682
2683         //H1=2,V1=2 for YUV420. So, compute these values accordingly
2684         frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2685         frame_height_in_blks = ((picture_height + 15) / 16) * 2;
2686         break;
2687     }
2688
2689     case JPEG_ENC_MCU_YUV422H_2Y: {
2690         if (picture_width % 2 == 0)
2691             pixels_in_horizontal_lastMCU = picture_width % 16;
2692         else
2693             pixels_in_horizontal_lastMCU = ((picture_width % 16) + 1) % 16;
2694
2695         pixels_in_vertical_lastMCU = picture_height % 8;
2696
2697         //H1=2,V1=1 for YUV422H_2Y. So, compute these values accordingly
2698         frame_width_in_blks = ((picture_width + 15) / 16) * 2;
2699         frame_height_in_blks = ((picture_height + 7) / 8);
2700         break;
2701     }
2702     } //end of switch
2703
2704     BEGIN_BCS_BATCH(batch, 3);
2705     /* DWORD 0 */
2706     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2707     /* DWORD 1 */
2708     OUT_BCS_BATCH(batch,
2709                   (pixels_in_horizontal_lastMCU << 26) |     /* Pixels In Horizontal Last MCU */
2710                   (pixels_in_vertical_lastMCU << 21)   |     /* Pixels In Vertical Last MCU */
2711                   (input_surface_format << 8)          |     /* Input Surface format */
2712                   (output_mcu_format << 0));                 /* Output MCU Structure */
2713     /* DWORD 2 */
2714     OUT_BCS_BATCH(batch,
2715                   ((frame_height_in_blks - 1) << 16)    |   /* Frame Height In Blks Minus 1 */
2716                   (JPEG_ENC_ROUND_QUANT_DEFAULT  << 13) |   /* Rounding Quant set to default value 0 */
2717                   ((frame_width_in_blks - 1) << 0));        /* Frame Width In Blks Minus 1 */
2718     ADVANCE_BCS_BATCH(batch);
2719 }
2720
2721 static void
2722 get_reciprocal_dword_qm(unsigned char *raster_qm, uint32_t *dword_qm)
2723 {
2724     int i = 0, j = 0;
2725     short reciprocal_qm[64];
2726
2727     for (i = 0; i < 64; i++) {
2728         reciprocal_qm[i] = 65535 / (raster_qm[i]);
2729     }
2730
2731     for (i = 0; i < 64; i++) {
2732         dword_qm[j] = ((reciprocal_qm[i + 1] << 16) | (reciprocal_qm[i]));
2733         j++;
2734         i++;
2735     }
2736
2737 }
2738
2739
2740 static void
2741 gen8_mfc_jpeg_fqm_state(VADriverContextP ctx,
2742                         struct intel_encoder_context *encoder_context,
2743                         struct encode_state *encode_state)
2744 {
2745     unsigned int quality = 0;
2746     uint32_t temp, i = 0, j = 0, dword_qm[32];
2747     VAEncPictureParameterBufferJPEG *pic_param;
2748     VAQMatrixBufferJPEG *qmatrix;
2749     unsigned char raster_qm[64], column_raster_qm[64];
2750     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2751
2752     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
2753     pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
2754     quality = pic_param->quality;
2755
2756     //If the app sends the qmatrix, use it, buffer it for using it with the next frames
2757     //The app can send qmatrix for the first frame and not send for the subsequent frames
2758     if (encode_state->q_matrix && encode_state->q_matrix->buffer) {
2759         qmatrix = (VAQMatrixBufferJPEG *)encode_state->q_matrix->buffer;
2760
2761         mfc_context->buffered_qmatrix.load_lum_quantiser_matrix = 1;
2762         memcpy(mfc_context->buffered_qmatrix.lum_quantiser_matrix, qmatrix->lum_quantiser_matrix, 64 * (sizeof(unsigned char)));
2763
2764         if (pic_param->num_components > 1) {
2765             mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 1;
2766             memcpy(mfc_context->buffered_qmatrix.chroma_quantiser_matrix, qmatrix->chroma_quantiser_matrix, 64 * (sizeof(unsigned char)));
2767         } else {
2768             mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 0;
2769         }
2770
2771     } else {
2772         //If the app doesnt send the qmatrix, use the buffered/default qmatrix
2773         qmatrix = &mfc_context->buffered_qmatrix;
2774         qmatrix->load_lum_quantiser_matrix = 1;
2775         qmatrix->load_chroma_quantiser_matrix = (pic_param->num_components > 1) ? 1 : 0;
2776     }
2777
2778
2779     //As per the design, normalization of the quality factor and scaling of the Quantization tables
2780     //based on the quality factor needs to be done in the driver before sending the values to the HW.
2781     //But note, the driver expects the scaled quantization tables (as per below logic) to be sent as
2782     //packed header information. The packed header is written as the header of the jpeg file. This
2783     //header information is used to decode the jpeg file. So, it is the app's responsibility to send
2784     //the correct header information (See build_packed_jpeg_header_buffer() in jpegenc.c in LibVa on
2785     //how to do this). QTables can be different for different applications. If no tables are provided,
2786     //the default tables in the driver are used.
2787
2788     //Normalization of the quality factor
2789     if (quality > 100) quality = 100;
2790     if (quality == 0)  quality = 1;
2791     quality = (quality < 50) ? (5000 / quality) : (200 - (quality * 2));
2792
2793     //Step 1. Apply Quality factor and clip to range [1, 255] for luma and chroma Quantization matrices
2794     //Step 2. HW expects the 1/Q[i] values in the qm sent, so get reciprocals
2795     //Step 3. HW also expects 32 dwords, hence combine 2 (1/Q) values into 1 dword
2796     //Step 4. Send the Quantization matrix to the HW, use gen8_mfc_fqm_state
2797
2798     //For luma (Y or R)
2799     if (qmatrix->load_lum_quantiser_matrix) {
2800         //apply quality to lum_quantiser_matrix
2801         for (i = 0; i < 64; i++) {
2802             temp = (qmatrix->lum_quantiser_matrix[i] * quality) / 100;
2803             //clamp to range [1,255]
2804             temp = (temp > 255) ? 255 : temp;
2805             temp = (temp < 1) ? 1 : temp;
2806             qmatrix->lum_quantiser_matrix[i] = (unsigned char)temp;
2807         }
2808
2809         //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
2810         //The App should send it in zigzag. Now, the driver has to extract the raster from it.
2811         for (j = 0; j < 64; j++)
2812             raster_qm[zigzag_direct[j]] = qmatrix->lum_quantiser_matrix[j];
2813
2814         //Convert the raster order(row-ordered) to the column-raster (column by column).
2815         //To be consistent with the other encoders, send it in column order.
2816         //Need to double check if our HW expects col or row raster.
2817         for (j = 0; j < 64; j++) {
2818             int row = j / 8, col = j % 8;
2819             column_raster_qm[col * 8 + row] = raster_qm[j];
2820         }
2821
2822         //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2823         get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2824
2825         //send the luma qm to the command buffer
2826         gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2827     }
2828
2829     //For Chroma, if chroma exists (Cb, Cr or G, B)
2830     if (qmatrix->load_chroma_quantiser_matrix) {
2831         //apply quality to chroma_quantiser_matrix
2832         for (i = 0; i < 64; i++) {
2833             temp = (qmatrix->chroma_quantiser_matrix[i] * quality) / 100;
2834             //clamp to range [1,255]
2835             temp = (temp > 255) ? 255 : temp;
2836             temp = (temp < 1) ? 1 : temp;
2837             qmatrix->chroma_quantiser_matrix[i] = (unsigned char)temp;
2838         }
2839
2840         //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
2841         //The App should send it in zigzag. Now, the driver has to extract the raster from it.
2842         for (j = 0; j < 64; j++)
2843             raster_qm[zigzag_direct[j]] = qmatrix->chroma_quantiser_matrix[j];
2844
2845         //Convert the raster order(row-ordered) to the column-raster (column by column).
2846         //To be consistent with the other encoders, send it in column order.
2847         //Need to double check if our HW expects col or row raster.
2848         for (j = 0; j < 64; j++) {
2849             int row = j / 8, col = j % 8;
2850             column_raster_qm[col * 8 + row] = raster_qm[j];
2851         }
2852
2853
2854         //Convert to raster QM to reciprocal. HW expects values in reciprocal.
2855         get_reciprocal_dword_qm(column_raster_qm, dword_qm);
2856
2857         //send the same chroma qm to the command buffer (for both U,V or G,B)
2858         gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2859         gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
2860     }
2861 }
2862
2863
2864 //Translation of Table K.5 into code: This method takes the huffval from the
2865 //Huffmantable buffer and converts into index for the coefficients and size tables
2866 uint8_t map_huffval_to_index(uint8_t huff_val)
2867 {
2868     uint8_t index = 0;
2869
2870     if (huff_val < 0xF0) {
2871         index = (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2872     } else {
2873         index = 1 + (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
2874     }
2875
2876     return index;
2877 }
2878
2879
2880 //Implementation of Flow chart Annex C  - Figure C.1
2881 static void
2882 generate_huffman_codesizes_table(uint8_t *bits, uint8_t *huff_size_table, uint8_t *lastK)
2883 {
2884     uint8_t i = 1, j = 1, k = 0;
2885
2886     while (i <= 16) {
2887         while (j <= (uint8_t)bits[i - 1]) {
2888             huff_size_table[k] = i;
2889             k = k + 1;
2890             j = j + 1;
2891         }
2892
2893         i = i + 1;
2894         j = 1;
2895     }
2896     huff_size_table[k] = 0;
2897     (*lastK) = k;
2898 }
2899
2900 //Implementation of Flow chart Annex C - Figure C.2
2901 static void
2902 generate_huffman_codes_table(uint8_t *huff_size_table, uint16_t *huff_code_table)
2903 {
2904     uint8_t k = 0;
2905     uint16_t code = 0;
2906     uint8_t si = huff_size_table[k];
2907
2908     while (huff_size_table[k] != 0) {
2909
2910         while (huff_size_table[k] == si) {
2911
2912             // An huffman code can never be 0xFFFF. Replace it with 0 if 0xFFFF
2913             if (code == 0xFFFF) {
2914                 code = 0x0000;
2915             }
2916
2917             huff_code_table[k] = code;
2918             code = code + 1;
2919             k = k + 1;
2920         }
2921
2922         code <<= 1;
2923         si = si + 1;
2924     }
2925
2926 }
2927
2928 //Implementation of Flow chat Annex C - Figure C.3
2929 static void
2930 generate_ordered_codes_table(uint8_t *huff_vals, uint8_t *huff_size_table, uint16_t *huff_code_table, uint8_t type, uint8_t lastK)
2931 {
2932     uint8_t huff_val_size = 0, i = 0, k = 0;
2933
2934     huff_val_size = (type == 0) ? 12 : 162;
2935     uint8_t huff_si_table[huff_val_size];
2936     uint16_t huff_co_table[huff_val_size];
2937
2938     memset(huff_si_table, 0, sizeof(huff_si_table));
2939     memset(huff_co_table, 0, sizeof(huff_co_table));
2940
2941     do {
2942         i = map_huffval_to_index(huff_vals[k]);
2943         huff_co_table[i] = huff_code_table[k];
2944         huff_si_table[i] = huff_size_table[k];
2945         k++;
2946     } while (k < lastK);
2947
2948     memcpy(huff_size_table, huff_si_table, sizeof(uint8_t)*huff_val_size);
2949     memcpy(huff_code_table, huff_co_table, sizeof(uint16_t)*huff_val_size);
2950 }
2951
2952
2953 //This method converts the huffman table to code words which is needed by the HW
2954 //Flowcharts from Jpeg Spec Annex C - Figure C.1, Figure C.2, Figure C.3 are used here
2955 static void
2956 convert_hufftable_to_codes(VAHuffmanTableBufferJPEGBaseline *huff_buffer, uint32_t *table, uint8_t type, uint8_t index)
2957 {
2958     uint8_t lastK = 0, i = 0;
2959     uint8_t huff_val_size = 0;
2960     uint8_t *huff_bits, *huff_vals;
2961
2962     huff_val_size = (type == 0) ? 12 : 162;
2963     uint8_t huff_size_table[huff_val_size + 1]; //The +1 for adding 0 at the end of huff_val_size
2964     uint16_t huff_code_table[huff_val_size];
2965
2966     memset(huff_size_table, 0, sizeof(huff_size_table));
2967     memset(huff_code_table, 0, sizeof(huff_code_table));
2968
2969     huff_bits = (type == 0) ? (huff_buffer->huffman_table[index].num_dc_codes) : (huff_buffer->huffman_table[index].num_ac_codes);
2970     huff_vals = (type == 0) ? (huff_buffer->huffman_table[index].dc_values) : (huff_buffer->huffman_table[index].ac_values);
2971
2972
2973     //Generation of table of Huffman code sizes
2974     generate_huffman_codesizes_table(huff_bits, huff_size_table, &lastK);
2975
2976     //Generation of table of Huffman codes
2977     generate_huffman_codes_table(huff_size_table, huff_code_table);
2978
2979     //Ordering procedure for encoding procedure code tables
2980     generate_ordered_codes_table(huff_vals, huff_size_table, huff_code_table, type, lastK);
2981
2982     //HW expects Byte0: Code length; Byte1,Byte2: Code Word, Byte3: Dummy
2983     //Since IA is littlended, &, | and << accordingly to store the values in the DWord.
2984     for (i = 0; i < huff_val_size; i++) {
2985         table[i] = 0;
2986         table[i] = ((huff_size_table[i] & 0xFF) | ((huff_code_table[i] & 0xFFFF) << 8));
2987     }
2988
2989 }
2990
2991 //send the huffman table using MFC_JPEG_HUFF_TABLE_STATE
2992 static void
2993 gen8_mfc_jpeg_huff_table_state(VADriverContextP ctx,
2994                                struct encode_state *encode_state,
2995                                struct intel_encoder_context *encoder_context,
2996                                int num_tables)
2997 {
2998     VAHuffmanTableBufferJPEGBaseline *huff_buffer;
2999     struct intel_batchbuffer *batch = encoder_context->base.batch;
3000     uint8_t index;
3001     uint32_t dc_table[12], ac_table[162];
3002
3003     assert(encode_state->huffman_table && encode_state->huffman_table->buffer);
3004     huff_buffer = (VAHuffmanTableBufferJPEGBaseline *)encode_state->huffman_table->buffer;
3005
3006     memset(dc_table, 0, 12);
3007     memset(ac_table, 0, 162);
3008
3009     for (index = 0; index < num_tables; index++) {
3010         int id = va_to_gen7_jpeg_hufftable[index];
3011
3012         if (!huff_buffer->load_huffman_table[index])
3013             continue;
3014
3015         //load DC table with 12 DWords
3016         convert_hufftable_to_codes(huff_buffer, dc_table, 0, index);  //0 for Dc
3017
3018         //load AC table with 162 DWords
3019         convert_hufftable_to_codes(huff_buffer, ac_table, 1, index);  //1 for AC
3020
3021         BEGIN_BCS_BATCH(batch, 176);
3022         OUT_BCS_BATCH(batch, MFC_JPEG_HUFF_TABLE_STATE | (176 - 2));
3023         OUT_BCS_BATCH(batch, id); //Huff table id
3024
3025         //DWord 2 - 13 has DC_TABLE
3026         intel_batchbuffer_data(batch, dc_table, 12 * 4);
3027
3028         //Dword 14 -175 has AC_TABLE
3029         intel_batchbuffer_data(batch, ac_table, 162 * 4);
3030         ADVANCE_BCS_BATCH(batch);
3031     }
3032 }
3033
3034
3035 //This method is used to compute the MCU count used for setting MFC_JPEG_SCAN_OBJECT
3036 static void get_Y_sampling_factors(uint32_t surface_format, uint8_t *h_factor, uint8_t *v_factor)
3037 {
3038     switch (surface_format) {
3039     case VA_FOURCC_Y800: {
3040         (* h_factor) = 1;
3041         (* v_factor) = 1;
3042         break;
3043     }
3044     case VA_FOURCC_NV12: {
3045         (* h_factor) = 2;
3046         (* v_factor) = 2;
3047         break;
3048     }
3049     case VA_FOURCC_UYVY: {
3050         (* h_factor) = 2;
3051         (* v_factor) = 1;
3052         break;
3053     }
3054     case VA_FOURCC_YUY2: {
3055         (* h_factor) = 2;
3056         (* v_factor) = 1;
3057         break;
3058     }
3059     case VA_FOURCC_RGBA:
3060     case VA_FOURCC_444P: {
3061         (* h_factor) = 1;
3062         (* v_factor) = 1;
3063         break;
3064     }
3065     default : { //May be  have to insert error handling here. For now just use as below
3066         (* h_factor) = 1;
3067         (* v_factor) = 1;
3068         break;
3069     }
3070     }
3071 }
3072
3073 //set MFC_JPEG_SCAN_OBJECT
3074 static void
3075 gen8_mfc_jpeg_scan_object(VADriverContextP ctx,
3076                           struct encode_state *encode_state,
3077                           struct intel_encoder_context *encoder_context)
3078 {
3079     uint32_t mcu_count, surface_format, Mx, My;
3080     uint8_t i, horizontal_sampling_factor, vertical_sampling_factor, huff_ac_table = 0, huff_dc_table = 0;
3081     uint8_t is_last_scan = 1;    //Jpeg has only 1 scan per frame. When last scan, HW inserts EOI code.
3082     uint8_t head_present_flag = 1; //Header has tables and app data
3083     uint16_t num_components, restart_interval;   //Specifies number of MCUs in an ECS.
3084     VAEncSliceParameterBufferJPEG *slice_param;
3085     VAEncPictureParameterBufferJPEG *pic_param;
3086
3087     struct intel_batchbuffer *batch = encoder_context->base.batch;
3088     struct object_surface *obj_surface = encode_state->input_yuv_object;
3089
3090     assert(encode_state->slice_params_ext[0] && encode_state->slice_params_ext[0]->buffer);
3091     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
3092     assert(obj_surface);
3093     pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
3094     slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[0]->buffer;
3095     surface_format = obj_surface->fourcc;
3096
3097     get_Y_sampling_factors(surface_format, &horizontal_sampling_factor, &vertical_sampling_factor);
3098
3099     // Mx = #MCUs in a row, My = #MCUs in a column
3100     Mx = (pic_param->picture_width + (horizontal_sampling_factor * 8 - 1)) / (horizontal_sampling_factor * 8);
3101     My = (pic_param->picture_height + (vertical_sampling_factor * 8 - 1)) / (vertical_sampling_factor * 8);
3102     mcu_count = (Mx * My);
3103
3104     num_components = pic_param->num_components;
3105     restart_interval = slice_param->restart_interval;
3106
3107     //Depending on number of components and values set for table selectors,
3108     //only those bits are set in 24:22 for AC table, 20:18 for DC table
3109     for (i = 0; i < num_components; i++) {
3110         huff_ac_table |= ((slice_param->components[i].ac_table_selector) << i);
3111         huff_dc_table |= ((slice_param->components[i].dc_table_selector) << i);
3112     }
3113
3114
3115     BEGIN_BCS_BATCH(batch, 3);
3116     /* DWORD 0 */
3117     OUT_BCS_BATCH(batch, MFC_JPEG_SCAN_OBJECT | (3 - 2));
3118     /* DWORD 1 */
3119     OUT_BCS_BATCH(batch, mcu_count << 0);       //MCU Count
3120     /* DWORD 2 */
3121     OUT_BCS_BATCH(batch,
3122                   (huff_ac_table << 22)     |   //Huffman AC Table
3123                   (huff_dc_table << 18)     |   //Huffman DC Table
3124                   (head_present_flag << 17) |   //Head present flag
3125                   (is_last_scan << 16)      |   //Is last scan
3126                   (restart_interval << 0));     //Restart Interval
3127     ADVANCE_BCS_BATCH(batch);
3128 }
3129
3130 static void
3131 gen8_mfc_jpeg_pak_insert_object(struct intel_encoder_context *encoder_context, unsigned int *insert_data,
3132                                 int length_in_dws, int data_bits_in_last_dw, int is_last_header,
3133                                 int is_end_of_slice)
3134 {
3135     struct intel_batchbuffer *batch = encoder_context->base.batch;
3136     assert(batch);
3137
3138     if (data_bits_in_last_dw == 0)
3139         data_bits_in_last_dw = 32;
3140
3141     BEGIN_BCS_BATCH(batch, length_in_dws + 2);
3142
3143     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (length_in_dws + 2 - 2));
3144     //DWord 1
3145     OUT_BCS_BATCH(batch,
3146                   (0 << 16) |                    //DataByteOffset 0 for JPEG Encoder
3147                   (0 << 15) |                    //HeaderLengthExcludeFrmSize 0 for JPEG Encoder
3148                   (data_bits_in_last_dw << 8) |  //DataBitsInLastDW
3149                   (0 << 4) |                     //SkipEmulByteCount 0 for JPEG Encoder
3150                   (0 << 3) |                     //EmulationFlag 0 for JPEG Encoder
3151                   ((!!is_last_header) << 2) |    //LastHeaderFlag
3152                   ((!!is_end_of_slice) << 1) |   //EndOfSliceFlag
3153                   (1 << 0));                     //BitstreamStartReset 1 for JPEG Encoder
3154     //Data Paylaod
3155     intel_batchbuffer_data(batch, insert_data, length_in_dws * 4);
3156
3157     ADVANCE_BCS_BATCH(batch);
3158 }
3159
3160
3161 //send the jpeg headers to HW using MFX_PAK_INSERT_OBJECT
3162 static void
3163 gen8_mfc_jpeg_add_headers(VADriverContextP ctx,
3164                           struct encode_state *encode_state,
3165                           struct intel_encoder_context *encoder_context)
3166 {
3167     if (encode_state->packed_header_data_ext) {
3168         VAEncPackedHeaderParameterBuffer *param = NULL;
3169         unsigned int *header_data = (unsigned int *)(*encode_state->packed_header_data_ext)->buffer;
3170         unsigned int length_in_bits;
3171
3172         param = (VAEncPackedHeaderParameterBuffer *)(*encode_state->packed_header_params_ext)->buffer;
3173         length_in_bits = param->bit_length;
3174
3175         gen8_mfc_jpeg_pak_insert_object(encoder_context,
3176                                         header_data,
3177                                         ALIGN(length_in_bits, 32) >> 5,
3178                                         length_in_bits & 0x1f,
3179                                         1,
3180                                         1);
3181     }
3182 }
3183
3184 //Initialize the buffered_qmatrix with the default qmatrix in the driver.
3185 //If the app sends the qmatrix, this will be replaced with the one app sends.
3186 static void
3187 jpeg_init_default_qmatrix(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3188 {
3189     int i = 0;
3190     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3191
3192     //Load the the QM in zigzag order. If app sends QM, it is always in zigzag order.
3193     for (i = 0; i < 64; i++)
3194         mfc_context->buffered_qmatrix.lum_quantiser_matrix[i] = jpeg_luma_quant[zigzag_direct[i]];
3195
3196     for (i = 0; i < 64; i++)
3197         mfc_context->buffered_qmatrix.chroma_quantiser_matrix[i] = jpeg_chroma_quant[zigzag_direct[i]];
3198 }
3199
3200 /* This is at the picture level */
3201 static void
3202 gen8_mfc_jpeg_pipeline_picture_programing(VADriverContextP ctx,
3203                                           struct encode_state *encode_state,
3204                                           struct intel_encoder_context *encoder_context)
3205 {
3206     int i, j, component, max_selector = 0;
3207     VAEncSliceParameterBufferJPEG *slice_param;
3208
3209     gen8_mfc_pipe_mode_select(ctx, MFX_FORMAT_JPEG, encoder_context);
3210     gen8_mfc_jpeg_set_surface_state(ctx, encoder_context, encode_state);
3211     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
3212     gen8_mfc_ind_obj_base_addr_state(ctx, encoder_context);
3213     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
3214     gen8_mfc_jpeg_pic_state(ctx, encoder_context, encode_state);
3215
3216     //do the slice level encoding here
3217     gen8_mfc_jpeg_fqm_state(ctx, encoder_context, encode_state);
3218
3219     //I dont think I need this for loop. Just to be consistent with other encoding logic...
3220     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
3221         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[i]->buffer);
3222         slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[i]->buffer;
3223
3224         for (j = 0; j < encode_state->slice_params_ext[i]->num_elements; j++) {
3225
3226             for (component = 0; component < slice_param->num_components; component++) {
3227                 if (max_selector < slice_param->components[component].dc_table_selector)
3228                     max_selector = slice_param->components[component].dc_table_selector;
3229
3230                 if (max_selector < slice_param->components[component].ac_table_selector)
3231                     max_selector = slice_param->components[component].ac_table_selector;
3232             }
3233
3234             slice_param++;
3235         }
3236     }
3237
3238     assert(max_selector < 2);
3239     //send the huffman table using MFC_JPEG_HUFF_TABLE
3240     gen8_mfc_jpeg_huff_table_state(ctx, encode_state, encoder_context, max_selector + 1);
3241     //set MFC_JPEG_SCAN_OBJECT
3242     gen8_mfc_jpeg_scan_object(ctx, encode_state, encoder_context);
3243     //add headers using MFX_PAK_INSERT_OBJECT (it is refered as MFX_INSERT_OBJECT in this driver code)
3244     gen8_mfc_jpeg_add_headers(ctx, encode_state, encoder_context);
3245
3246 }
3247
3248 static void
3249 gen8_mfc_jpeg_pipeline_programing(VADriverContextP ctx,
3250                                   struct encode_state *encode_state,
3251                                   struct intel_encoder_context *encoder_context)
3252 {
3253     struct intel_batchbuffer *batch = encoder_context->base.batch;
3254
3255     // begin programing
3256     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
3257     intel_batchbuffer_emit_mi_flush(batch);
3258
3259     // picture level programing
3260     gen8_mfc_jpeg_pipeline_picture_programing(ctx, encode_state, encoder_context);
3261
3262     // end programing
3263     intel_batchbuffer_end_atomic(batch);
3264
3265 }
3266
3267
3268 static VAStatus
3269 gen8_mfc_jpeg_encode_picture(VADriverContextP ctx,
3270                              struct encode_state *encode_state,
3271                              struct intel_encoder_context *encoder_context)
3272 {
3273     gen8_mfc_init(ctx, encode_state, encoder_context);
3274     intel_mfc_jpeg_prepare(ctx, encode_state, encoder_context);
3275     /*Programing bcs pipeline*/
3276     gen8_mfc_jpeg_pipeline_programing(ctx, encode_state, encoder_context);
3277     gen8_mfc_run(ctx, encode_state, encoder_context);
3278
3279     return VA_STATUS_SUCCESS;
3280 }
3281
3282 static int gen8_mfc_vp8_qindex_estimate(struct encode_state *encode_state,
3283                                         struct gen6_mfc_context *mfc_context,
3284                                         int target_frame_size,
3285                                         int is_key_frame)
3286 {
3287     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3288     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3289     unsigned int max_qindex = pic_param->clamp_qindex_high;
3290     unsigned int min_qindex = pic_param->clamp_qindex_low;
3291     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3292     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3293     int target_mb_size;
3294     int last_size_gap  = -1;
3295     int per_mb_size_at_qindex;
3296     int target_qindex = min_qindex, i;
3297
3298     /* make sure would not overflow*/
3299     if (target_frame_size >= (0x7fffffff >> 9))
3300         target_mb_size = (target_frame_size / width_in_mbs / height_in_mbs) << 9;
3301     else
3302         target_mb_size = (target_frame_size << 9) / width_in_mbs / height_in_mbs;
3303
3304     for (i = min_qindex; i <= max_qindex; i++) {
3305         per_mb_size_at_qindex = vp8_bits_per_mb[!is_key_frame][i];
3306         target_qindex = i;
3307         if (per_mb_size_at_qindex <= target_mb_size) {
3308             if (target_mb_size - per_mb_size_at_qindex < last_size_gap)
3309                 target_qindex--;
3310             break;
3311         } else
3312             last_size_gap = per_mb_size_at_qindex - target_mb_size;
3313     }
3314
3315     return target_qindex;
3316 }
3317
3318 static void gen8_mfc_vp8_brc_init(struct encode_state *encode_state,
3319                                   struct intel_encoder_context* encoder_context)
3320 {
3321     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3322     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3323     double bitrate = encoder_context->brc.bits_per_second[0];
3324     double framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
3325     int inum = 1, pnum = 0;
3326     int intra_period = seq_param->intra_period;
3327     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
3328     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
3329     int max_frame_size = (vp8_bits_per_mb[0][0] >> 9) * width_in_mbs * height_in_mbs; /* vp8_bits_per_mb table mutilpled 512 */
3330
3331     pnum = intra_period  - 1;
3332
3333     mfc_context->brc.mode = encoder_context->rate_control_mode;
3334
3335     mfc_context->brc.target_frame_size[0][SLICE_TYPE_I] = (int)((double)((bitrate * intra_period) / framerate) /
3336                                                                 (double)(inum + BRC_PWEIGHT * pnum));
3337     mfc_context->brc.target_frame_size[0][SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[0][SLICE_TYPE_I];
3338
3339     mfc_context->brc.gop_nums[0][SLICE_TYPE_I] = inum;
3340     mfc_context->brc.gop_nums[0][SLICE_TYPE_P] = pnum;
3341
3342     mfc_context->brc.bits_per_frame[0] = bitrate / framerate;
3343
3344     mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] = gen8_mfc_vp8_qindex_estimate(encode_state,
3345                                                                                 mfc_context,
3346                                                                                 mfc_context->brc.target_frame_size[0][SLICE_TYPE_I],
3347                                                                                 1);
3348     mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] = gen8_mfc_vp8_qindex_estimate(encode_state,
3349                                                                                 mfc_context,
3350                                                                                 mfc_context->brc.target_frame_size[0][SLICE_TYPE_P],
3351                                                                                 0);
3352
3353     if (encoder_context->brc.hrd_buffer_size)
3354         mfc_context->hrd.buffer_size[0] = (double)encoder_context->brc.hrd_buffer_size;
3355     else
3356         mfc_context->hrd.buffer_size[0] = bitrate;
3357     if (encoder_context->brc.hrd_initial_buffer_fullness &&
3358         encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size[0])
3359         mfc_context->hrd.current_buffer_fullness[0] = (double)encoder_context->brc.hrd_initial_buffer_fullness;
3360     else
3361         mfc_context->hrd.current_buffer_fullness[0] = mfc_context->hrd.buffer_size[0] / 2.0;
3362     mfc_context->hrd.target_buffer_fullness[0] = (double)mfc_context->hrd.buffer_size[0] / 2.0;
3363     mfc_context->hrd.buffer_capacity[0] = (double)mfc_context->hrd.buffer_size[0] / max_frame_size;
3364     mfc_context->hrd.violation_noted = 0;
3365 }
3366
3367 static int gen8_mfc_vp8_brc_postpack(struct encode_state *encode_state,
3368                                      struct intel_encoder_context *encoder_context,
3369                                      int frame_bits)
3370 {
3371     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3372     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
3373     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3374     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3375     int slicetype = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3376     int qpi = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I];
3377     int qpp = mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P];
3378     int qp; // quantizer of previously encoded slice of current type
3379     int qpn; // predicted quantizer for next frame of current type in integer format
3380     double qpf; // predicted quantizer for next frame of current type in float format
3381     double delta_qp; // QP correction
3382     int target_frame_size, frame_size_next;
3383     /* Notes:
3384      *  x - how far we are from HRD buffer borders
3385      *  y - how far we are from target HRD buffer fullness
3386      */
3387     double x, y;
3388     double frame_size_alpha;
3389     unsigned int max_qindex = pic_param->clamp_qindex_high;
3390     unsigned int min_qindex = pic_param->clamp_qindex_low;
3391
3392     qp = mfc_context->brc.qp_prime_y[0][slicetype];
3393
3394     target_frame_size = mfc_context->brc.target_frame_size[0][slicetype];
3395     if (mfc_context->hrd.buffer_capacity[0] < 5)
3396         frame_size_alpha = 0;
3397     else
3398         frame_size_alpha = (double)mfc_context->brc.gop_nums[0][slicetype];
3399     if (frame_size_alpha > 30) frame_size_alpha = 30;
3400     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
3401                       (double)(frame_size_alpha + 1.);
3402
3403     /* frame_size_next: avoiding negative number and too small value */
3404     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
3405         frame_size_next = (int)((double)target_frame_size * 0.25);
3406
3407     qpf = (double)qp * target_frame_size / frame_size_next;
3408     qpn = (int)(qpf + 0.5);
3409
3410     if (qpn == qp) {
3411         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
3412         mfc_context->brc.qpf_rounding_accumulator[0] += qpf - qpn;
3413         if (mfc_context->brc.qpf_rounding_accumulator[0] > 1.0) {
3414             qpn++;
3415             mfc_context->brc.qpf_rounding_accumulator[0] = 0.;
3416         } else if (mfc_context->brc.qpf_rounding_accumulator[0] < -1.0) {
3417             qpn--;
3418             mfc_context->brc.qpf_rounding_accumulator[0] = 0.;
3419         }
3420     }
3421
3422     /* making sure that QP is not changing too fast */
3423     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
3424     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
3425     /* making sure that with QP predictions we did do not leave QPs range */
3426     BRC_CLIP(qpn, min_qindex, max_qindex);
3427
3428     /* checking wthether HRD compliance is still met */
3429     sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
3430
3431     /* calculating QP delta as some function*/
3432     x = mfc_context->hrd.target_buffer_fullness[0] - mfc_context->hrd.current_buffer_fullness[0];
3433     if (x > 0) {
3434         x /= mfc_context->hrd.target_buffer_fullness[0];
3435         y = mfc_context->hrd.current_buffer_fullness[0];
3436     } else {
3437         x /= (mfc_context->hrd.buffer_size[0] - mfc_context->hrd.target_buffer_fullness[0]);
3438         y = mfc_context->hrd.buffer_size[0] - mfc_context->hrd.current_buffer_fullness[0];
3439     }
3440     if (y < 0.01) y = 0.01;
3441     if (x > 1) x = 1;
3442     else if (x < -1) x = -1;
3443
3444     delta_qp = BRC_QP_MAX_CHANGE * exp(-1 / y) * sin(BRC_PI_0_5 * x);
3445     qpn = (int)(qpn + delta_qp + 0.5);
3446
3447     /* making sure that with QP predictions we did do not leave QPs range */
3448     BRC_CLIP(qpn, min_qindex, max_qindex);
3449
3450     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
3451         /* correcting QPs of slices of other types */
3452         if (!is_key_frame) {
3453             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 4)
3454                 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I] += (qpn - BRC_I_P_QP_DIFF - qpi) >> 2;
3455         } else {
3456             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 4)
3457                 mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P] += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
3458         }
3459         BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qindex, max_qindex);
3460         BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qindex, max_qindex);
3461     } else if (sts == BRC_UNDERFLOW) { // underflow
3462         if (qpn <= qp) qpn = qp + 2;
3463         if (qpn > max_qindex) {
3464             qpn = max_qindex;
3465             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
3466         }
3467     } else if (sts == BRC_OVERFLOW) {
3468         if (qpn >= qp) qpn = qp - 2;
3469         if (qpn < min_qindex) { // < 0 (?) overflow with minQP
3470             qpn = min_qindex;
3471             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
3472         }
3473     }
3474
3475     mfc_context->brc.qp_prime_y[0][slicetype] = qpn;
3476
3477     return sts;
3478 }
3479
3480 static void gen8_mfc_vp8_hrd_context_init(struct encode_state *encode_state,
3481                                           struct intel_encoder_context *encoder_context)
3482 {
3483     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3484     unsigned int rate_control_mode = encoder_context->rate_control_mode;
3485     int target_bit_rate = encoder_context->brc.bits_per_second[0];
3486
3487     // current we only support CBR mode.
3488     if (rate_control_mode == VA_RC_CBR) {
3489         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
3490         mfc_context->vui_hrd.i_initial_cpb_removal_delay = ((target_bit_rate * 8) >> 10) * 0.5 * 1024 / target_bit_rate * 90000;
3491         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
3492         mfc_context->vui_hrd.i_frame_number = 0;
3493
3494         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
3495         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
3496         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
3497     }
3498
3499 }
3500
3501 static void gen8_mfc_vp8_hrd_context_update(struct encode_state *encode_state,
3502                                             struct gen6_mfc_context *mfc_context)
3503 {
3504     mfc_context->vui_hrd.i_frame_number++;
3505 }
3506
3507 static void gen8_mfc_vp8_brc_prepare(struct encode_state *encode_state,
3508                                      struct intel_encoder_context *encoder_context)
3509 {
3510     unsigned int rate_control_mode = encoder_context->rate_control_mode;
3511
3512     if (rate_control_mode == VA_RC_CBR) {
3513         bool brc_updated;
3514         assert(encoder_context->codec != CODEC_MPEG2);
3515
3516         brc_updated = encoder_context->brc.need_reset;
3517
3518         /*Programing bit rate control */
3519         if (brc_updated) {
3520             gen8_mfc_vp8_brc_init(encode_state, encoder_context);
3521         }
3522
3523         /*Programing HRD control */
3524         if (brc_updated)
3525             gen8_mfc_vp8_hrd_context_init(encode_state, encoder_context);
3526     }
3527 }
3528
3529 static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
3530                                VAEncPictureParameterBufferVP8 *pic_param,
3531                                VAQMatrixBufferVP8 *q_matrix)
3532 {
3533
3534     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
3535     unsigned char *coeff_probs_stream_in_buffer;
3536
3537     mfc_context->vp8_state.frame_header_lf_update_pos = 0;
3538     mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
3539     mfc_context->vp8_state.frame_header_token_update_pos = 0;
3540     mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
3541
3542     mfc_context->vp8_state.prob_skip_false = 255;
3543     memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
3544     memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
3545
3546     if (is_key_frame) {
3547         memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3548         memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3549
3550         mfc_context->vp8_state.prob_intra = 255;
3551         mfc_context->vp8_state.prob_last = 128;
3552         mfc_context->vp8_state.prob_gf = 128;
3553     } else {
3554         memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
3555         memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
3556
3557         mfc_context->vp8_state.prob_intra = 63;
3558         mfc_context->vp8_state.prob_last = 128;
3559         mfc_context->vp8_state.prob_gf = 128;
3560     }
3561
3562     mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
3563
3564     dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
3565     coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
3566     assert(coeff_probs_stream_in_buffer);
3567     memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
3568     dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3569 }
3570
3571 static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
3572                                  VAQMatrixBufferVP8 *q_matrix)
3573 {
3574
3575     /*some other probabilities need to be updated*/
3576 }
3577
3578 extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
3579                                       VAEncPictureParameterBufferVP8 *pic_param,
3580                                       VAQMatrixBufferVP8 *q_matrix,
3581                                       struct gen6_mfc_context *mfc_context,
3582                                       struct intel_encoder_context *encoder_context);
3583
3584 static void vp8_enc_frame_header_binarize(struct encode_state *encode_state,
3585                                           struct intel_encoder_context *encoder_context,
3586                                           struct gen6_mfc_context *mfc_context)
3587 {
3588     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3589     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3590     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3591     unsigned char *frame_header_buffer;
3592
3593     binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context, encoder_context);
3594
3595     dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
3596     frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
3597     assert(frame_header_buffer);
3598     memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
3599     free(mfc_context->vp8_state.vp8_frame_header);
3600     dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
3601 }
3602
3603 #define MAX_VP8_FRAME_HEADER_SIZE              0x2000
3604 #define VP8_TOKEN_STATISTICS_BUFFER_SIZE       0x2000
3605
3606 static void gen8_mfc_vp8_init(VADriverContextP ctx,
3607                               struct encode_state *encode_state,
3608                               struct intel_encoder_context *encoder_context)
3609 {
3610     struct i965_driver_data *i965 = i965_driver_data(ctx);
3611     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3612     dri_bo *bo;
3613     int i;
3614     int width_in_mbs = 0;
3615     int height_in_mbs = 0;
3616     int slice_batchbuffer_size;
3617     int is_key_frame, slice_type, rate_control_mode;
3618
3619     VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3620     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3621     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3622
3623     width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3624     height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
3625
3626     is_key_frame = !pic_param->pic_flags.bits.frame_type;
3627     slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
3628     rate_control_mode = encoder_context->rate_control_mode;
3629
3630     if (rate_control_mode == VA_RC_CBR) {
3631         q_matrix->quantization_index[0] = mfc_context->brc.qp_prime_y[0][slice_type];
3632         for (i = 1; i < 4; i++)
3633             q_matrix->quantization_index[i] = q_matrix->quantization_index[0];
3634         for (i = 0; i < 5; i++)
3635             q_matrix->quantization_index_delta[i] = 0;
3636     }
3637
3638     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
3639                              (SLICE_HEADER + SLICE_TAIL);
3640
3641     /*Encode common setup for MFC*/
3642     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
3643     mfc_context->post_deblocking_output.bo = NULL;
3644
3645     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
3646     mfc_context->pre_deblocking_output.bo = NULL;
3647
3648     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
3649     mfc_context->uncompressed_picture_source.bo = NULL;
3650
3651     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
3652     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
3653
3654     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
3655         if (mfc_context->direct_mv_buffers[i].bo != NULL)
3656             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
3657         mfc_context->direct_mv_buffers[i].bo = NULL;
3658     }
3659
3660     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
3661         if (mfc_context->reference_surfaces[i].bo != NULL)
3662             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
3663         mfc_context->reference_surfaces[i].bo = NULL;
3664     }
3665
3666     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
3667     bo = dri_bo_alloc(i965->intel.bufmgr,
3668                       "Buffer",
3669                       width_in_mbs * 64 * 16,
3670                       64);
3671     assert(bo);
3672     mfc_context->intra_row_store_scratch_buffer.bo = bo;
3673
3674     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
3675     bo = dri_bo_alloc(i965->intel.bufmgr,
3676                       "Buffer",
3677                       width_in_mbs * height_in_mbs * 16,
3678                       64);
3679     assert(bo);
3680     mfc_context->macroblock_status_buffer.bo = bo;
3681
3682     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
3683     bo = dri_bo_alloc(i965->intel.bufmgr,
3684                       "Buffer",
3685                       16 * width_in_mbs * 64,  /* 16 * width_in_mbs * 64 */
3686                       64);
3687     assert(bo);
3688     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
3689
3690     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
3691     bo = dri_bo_alloc(i965->intel.bufmgr,
3692                       "Buffer",
3693                       16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
3694                       0x1000);
3695     assert(bo);
3696     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
3697
3698     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
3699     mfc_context->mfc_batchbuffer_surface.bo = NULL;
3700
3701     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
3702     mfc_context->aux_batchbuffer_surface.bo = NULL;
3703
3704     if (mfc_context->aux_batchbuffer) {
3705         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
3706         mfc_context->aux_batchbuffer = NULL;
3707     }
3708
3709     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
3710     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
3711     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
3712     mfc_context->aux_batchbuffer_surface.pitch = 16;
3713     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
3714     mfc_context->aux_batchbuffer_surface.size_block = 16;
3715
3716     gen8_gpe_context_init(ctx, &mfc_context->gpe_context);
3717
3718     /* alloc vp8 encoding buffers*/
3719     dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
3720     bo = dri_bo_alloc(i965->intel.bufmgr,
3721                       "Buffer",
3722                       MAX_VP8_FRAME_HEADER_SIZE,
3723                       0x1000);
3724     assert(bo);
3725     mfc_context->vp8_state.frame_header_bo = bo;
3726
3727     mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 384 * 9;
3728     for (i = 0; i < 8; i++) {
3729         mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 384 * (i + 1);
3730     }
3731     dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
3732     bo = dri_bo_alloc(i965->intel.bufmgr,
3733                       "Buffer",
3734                       mfc_context->vp8_state.intermediate_buffer_max_size,
3735                       0x1000);
3736     assert(bo);
3737     mfc_context->vp8_state.intermediate_bo = bo;
3738
3739     dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
3740     bo = dri_bo_alloc(i965->intel.bufmgr,
3741                       "Buffer",
3742                       width_in_mbs * height_in_mbs * 16,
3743                       0x1000);
3744     assert(bo);
3745     mfc_context->vp8_state.stream_out_bo = bo;
3746
3747     dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
3748     bo = dri_bo_alloc(i965->intel.bufmgr,
3749                       "Buffer",
3750                       sizeof(vp8_default_coef_probs),
3751                       0x1000);
3752     assert(bo);
3753     mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
3754
3755     dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
3756     bo = dri_bo_alloc(i965->intel.bufmgr,
3757                       "Buffer",
3758                       VP8_TOKEN_STATISTICS_BUFFER_SIZE,
3759                       0x1000);
3760     assert(bo);
3761     mfc_context->vp8_state.token_statistics_bo = bo;
3762
3763     dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
3764     bo = dri_bo_alloc(i965->intel.bufmgr,
3765                       "Buffer",
3766                       width_in_mbs * 16 * 64,
3767                       0x1000);
3768     assert(bo);
3769     mfc_context->vp8_state.mpc_row_store_bo = bo;
3770
3771     vp8_enc_state_init(mfc_context, pic_param, q_matrix);
3772     vp8_enc_frame_header_binarize(encode_state, encoder_context, mfc_context);
3773 }
3774
3775 static VAStatus
3776 intel_mfc_vp8_prepare(VADriverContextP ctx,
3777                       struct encode_state *encode_state,
3778                       struct intel_encoder_context *encoder_context)
3779 {
3780     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3781     struct object_surface *obj_surface;
3782     struct object_buffer *obj_buffer;
3783     struct i965_coded_buffer_segment *coded_buffer_segment;
3784     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3785     VAStatus vaStatus = VA_STATUS_SUCCESS;
3786     dri_bo *bo;
3787     int i;
3788
3789     /* reconstructed surface */
3790     obj_surface = encode_state->reconstructed_object;
3791     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
3792     if (pic_param->loop_filter_level[0] == 0) {
3793         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
3794         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
3795     } else {
3796         mfc_context->post_deblocking_output.bo = obj_surface->bo;
3797         dri_bo_reference(mfc_context->post_deblocking_output.bo);
3798     }
3799
3800     mfc_context->surface_state.width = obj_surface->orig_width;
3801     mfc_context->surface_state.height = obj_surface->orig_height;
3802     mfc_context->surface_state.w_pitch = obj_surface->width;
3803     mfc_context->surface_state.h_pitch = obj_surface->height;
3804
3805     /* set vp8 reference frames */
3806     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
3807         obj_surface = encode_state->reference_objects[i];
3808
3809         if (obj_surface && obj_surface->bo) {
3810             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
3811             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
3812         } else {
3813             mfc_context->reference_surfaces[i].bo = NULL;
3814         }
3815     }
3816
3817     /* input YUV surface */
3818     obj_surface = encode_state->input_yuv_object;
3819     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
3820     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
3821
3822     /* coded buffer */
3823     obj_buffer = encode_state->coded_buf_object;
3824     bo = obj_buffer->buffer_store->bo;
3825     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
3826     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
3827     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
3828     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
3829
3830     dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
3831     mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
3832     mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
3833     dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
3834
3835     /* set the internal flag to 0 to indicate the coded size is unknown */
3836     dri_bo_map(bo, 1);
3837     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
3838     coded_buffer_segment->mapped = 0;
3839     coded_buffer_segment->codec = encoder_context->codec;
3840     dri_bo_unmap(bo);
3841
3842     return vaStatus;
3843 }
3844
3845 static void
3846 gen8_mfc_vp8_encoder_cfg(VADriverContextP ctx,
3847                          struct encode_state *encode_state,
3848                          struct intel_encoder_context *encoder_context)
3849 {
3850     struct intel_batchbuffer *batch = encoder_context->base.batch;
3851     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3852     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3853     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3854
3855     BEGIN_BCS_BATCH(batch, 30);
3856     OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
3857
3858     OUT_BCS_BATCH(batch,
3859                   0 << 9 | /* compressed bitstream output disable */
3860                   1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
3861                   1 << 6 | /* RC initial pass */
3862                   0 << 4 | /* upate segment feature date flag */
3863                   1 << 3 | /* bitstream statistics output enable */
3864                   1 << 2 | /* token statistics output enable */
3865                   0 << 1 | /* final bitstream output disable */
3866                   0 << 0); /*DW1*/
3867
3868     OUT_BCS_BATCH(batch, 0); /*DW2*/
3869
3870     OUT_BCS_BATCH(batch,
3871                   0xfff << 16 | /* max intra mb bit count limit */
3872                   0xfff << 0  /* max inter mb bit count limit */
3873                  ); /*DW3*/
3874
3875     OUT_BCS_BATCH(batch, 0); /*DW4*/
3876     OUT_BCS_BATCH(batch, 0); /*DW5*/
3877     OUT_BCS_BATCH(batch, 0); /*DW6*/
3878     OUT_BCS_BATCH(batch, 0); /*DW7*/
3879     OUT_BCS_BATCH(batch, 0); /*DW8*/
3880     OUT_BCS_BATCH(batch, 0); /*DW9*/
3881     OUT_BCS_BATCH(batch, 0); /*DW10*/
3882     OUT_BCS_BATCH(batch, 0); /*DW11*/
3883     OUT_BCS_BATCH(batch, 0); /*DW12*/
3884     OUT_BCS_BATCH(batch, 0); /*DW13*/
3885     OUT_BCS_BATCH(batch, 0); /*DW14*/
3886     OUT_BCS_BATCH(batch, 0); /*DW15*/
3887     OUT_BCS_BATCH(batch, 0); /*DW16*/
3888     OUT_BCS_BATCH(batch, 0); /*DW17*/
3889     OUT_BCS_BATCH(batch, 0); /*DW18*/
3890     OUT_BCS_BATCH(batch, 0); /*DW19*/
3891     OUT_BCS_BATCH(batch, 0); /*DW20*/
3892     OUT_BCS_BATCH(batch, 0); /*DW21*/
3893
3894     OUT_BCS_BATCH(batch,
3895                   pic_param->pic_flags.bits.show_frame << 23 |
3896                   pic_param->pic_flags.bits.version << 20
3897                  ); /*DW22*/
3898
3899     OUT_BCS_BATCH(batch,
3900                   (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
3901                   (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
3902                  );
3903
3904     /*DW24*/
3905     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
3906
3907     /*DW25*/
3908     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
3909
3910     /*DW26*/
3911     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
3912
3913     /*DW27*/
3914     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
3915
3916     /*DW28*/
3917     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
3918
3919     /*DW29*/
3920     OUT_BCS_BATCH(batch, 0);
3921
3922     ADVANCE_BCS_BATCH(batch);
3923 }
3924
3925 static void
3926 gen8_mfc_vp8_pic_state(VADriverContextP ctx,
3927                        struct encode_state *encode_state,
3928                        struct intel_encoder_context *encoder_context)
3929 {
3930     struct intel_batchbuffer *batch = encoder_context->base.batch;
3931     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
3932     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
3933     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
3934     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
3935     int i, j, log2num;
3936
3937     log2num = pic_param->pic_flags.bits.num_token_partitions;
3938
3939     /*update mode and token probs*/
3940     vp8_enc_state_update(mfc_context, q_matrix);
3941
3942     BEGIN_BCS_BATCH(batch, 38);
3943     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
3944     OUT_BCS_BATCH(batch,
3945                   (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
3946                   (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
3947
3948     OUT_BCS_BATCH(batch,
3949                   log2num << 24 |
3950                   pic_param->sharpness_level << 16 |
3951                   pic_param->pic_flags.bits.sign_bias_alternate << 13 |
3952                   pic_param->pic_flags.bits.sign_bias_golden << 12 |
3953                   pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
3954                   pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
3955                   pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
3956                   pic_param->pic_flags.bits.segmentation_enabled << 8 |
3957                   !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
3958                   (pic_param->pic_flags.bits.version / 2) << 4 |
3959                   (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
3960                   !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
3961
3962     OUT_BCS_BATCH(batch,
3963                   pic_param->loop_filter_level[3] << 24 |
3964                   pic_param->loop_filter_level[2] << 16 |
3965                   pic_param->loop_filter_level[1] <<  8 |
3966                   pic_param->loop_filter_level[0] <<  0);
3967
3968     OUT_BCS_BATCH(batch,
3969                   q_matrix->quantization_index[3] << 24 |
3970                   q_matrix->quantization_index[2] << 16 |
3971                   q_matrix->quantization_index[1] <<  8 |
3972                   q_matrix->quantization_index[0] << 0);
3973
3974     OUT_BCS_BATCH(batch,
3975                   ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 |
3976                   abs(q_matrix->quantization_index_delta[4]) << 24 |
3977                   ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 |
3978                   abs(q_matrix->quantization_index_delta[3]) << 16 |
3979                   ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 |
3980                   abs(q_matrix->quantization_index_delta[2]) << 8 |
3981                   ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 |
3982                   abs(q_matrix->quantization_index_delta[1]) << 0);
3983
3984     OUT_BCS_BATCH(batch,
3985                   ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
3986                   abs(q_matrix->quantization_index_delta[0]) << 0);
3987
3988     OUT_BCS_BATCH(batch,
3989                   pic_param->clamp_qindex_high << 8 |
3990                   pic_param->clamp_qindex_low << 0);
3991
3992     for (i = 8; i < 19; i++) {
3993         OUT_BCS_BATCH(batch, 0xffffffff);
3994     }
3995
3996     OUT_BCS_BATCH(batch,
3997                   mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
3998                   mfc_context->vp8_state.mb_segment_tree_probs[1] <<  8 |
3999                   mfc_context->vp8_state.mb_segment_tree_probs[0] <<  0);
4000
4001     OUT_BCS_BATCH(batch,
4002                   mfc_context->vp8_state.prob_skip_false << 24 |
4003                   mfc_context->vp8_state.prob_intra      << 16 |
4004                   mfc_context->vp8_state.prob_last       <<  8 |
4005                   mfc_context->vp8_state.prob_gf         <<  0);
4006
4007     OUT_BCS_BATCH(batch,
4008                   mfc_context->vp8_state.y_mode_probs[3] << 24 |
4009                   mfc_context->vp8_state.y_mode_probs[2] << 16 |
4010                   mfc_context->vp8_state.y_mode_probs[1] <<  8 |
4011                   mfc_context->vp8_state.y_mode_probs[0] <<  0);
4012
4013     OUT_BCS_BATCH(batch,
4014                   mfc_context->vp8_state.uv_mode_probs[2] << 16 |
4015                   mfc_context->vp8_state.uv_mode_probs[1] <<  8 |
4016                   mfc_context->vp8_state.uv_mode_probs[0] <<  0);
4017
4018     /* MV update value, DW23-DW32 */
4019     for (i = 0; i < 2; i++) {
4020         for (j = 0; j < 20; j += 4) {
4021             OUT_BCS_BATCH(batch,
4022                           (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
4023                           mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
4024                           mfc_context->vp8_state.mv_probs[i][j + 1] <<  8 |
4025                           mfc_context->vp8_state.mv_probs[i][j + 0] <<  0);
4026         }
4027     }
4028
4029     OUT_BCS_BATCH(batch,
4030                   (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
4031                   (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
4032                   (pic_param->ref_lf_delta[1] & 0x7f) <<  8 |
4033                   (pic_param->ref_lf_delta[0] & 0x7f) <<  0);
4034
4035     OUT_BCS_BATCH(batch,
4036                   (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
4037                   (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
4038                   (pic_param->mode_lf_delta[1] & 0x7f) <<  8 |
4039                   (pic_param->mode_lf_delta[0] & 0x7f) <<  0);
4040
4041     OUT_BCS_BATCH(batch, 0);
4042     OUT_BCS_BATCH(batch, 0);
4043     OUT_BCS_BATCH(batch, 0);
4044
4045     ADVANCE_BCS_BATCH(batch);
4046 }
4047
4048 #define OUT_VP8_BUFFER(bo, offset)                                      \
4049     if (bo)                                                             \
4050         OUT_BCS_RELOC64(batch,                                            \
4051                       bo,                                               \
4052                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
4053                       offset);                                           \
4054     else  {                                                               \
4055         OUT_BCS_BATCH(batch, 0);                                        \
4056         OUT_BCS_BATCH(batch, 0);                                        \
4057     }                                                                   \
4058     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4059
4060 static void
4061 gen8_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx,
4062                                      struct encode_state *encode_state,
4063                                      struct intel_encoder_context *encoder_context)
4064 {
4065     struct i965_driver_data *i965 = i965_driver_data(ctx);
4066     struct intel_batchbuffer *batch = encoder_context->base.batch;
4067     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4068
4069     BEGIN_BCS_BATCH(batch, 32);
4070     OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
4071
4072     OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
4073
4074     OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
4075     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
4076     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
4077     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
4078     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
4079     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
4080     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
4081     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
4082     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
4083     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
4084
4085     OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
4086     OUT_BCS_BATCH(batch, 0);
4087
4088     OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
4089     OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
4090     OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
4091     OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
4092
4093     ADVANCE_BCS_BATCH(batch);
4094 }
4095
4096 static void
4097 gen8_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
4098                                          struct encode_state *encode_state,
4099                                          struct intel_encoder_context *encoder_context)
4100 {
4101     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4102
4103     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
4104     mfc_context->set_surface_state(ctx, encoder_context);
4105     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
4106     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
4107     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
4108     gen8_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
4109     gen8_mfc_vp8_pic_state(ctx, encode_state, encoder_context);
4110     gen8_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
4111 }
4112
4113 static const unsigned char
4114 vp8_intra_mb_mode_map[VME_MB_INTRA_MODE_COUNT] = {
4115     PAK_V_PRED,
4116     PAK_H_PRED,
4117     PAK_DC_PRED,
4118     PAK_TM_PRED
4119 };
4120
4121 static const unsigned char
4122 vp8_intra_block_mode_map[VME_B_INTRA_MODE_COUNT] = {
4123     PAK_B_VE_PRED,
4124     PAK_B_HE_PRED,
4125     PAK_B_DC_PRED,
4126     PAK_B_LD_PRED,
4127     PAK_B_RD_PRED,
4128     PAK_B_VR_PRED,
4129     PAK_B_HD_PRED,
4130     PAK_B_VL_PRED,
4131     PAK_B_HU_PRED
4132 };
4133
4134 static int inline gen8_mfc_vp8_intra_mb_mode_map(unsigned int vme_pred_mode, int is_luma_4x4)
4135 {
4136     unsigned int i, pak_pred_mode = 0;
4137     unsigned int vme_sub_blocks_pred_mode[8], pak_sub_blocks_pred_mode[8]; /* 8 blocks's intra mode */
4138
4139     if (!is_luma_4x4) {
4140         pak_pred_mode = vp8_intra_mb_mode_map[vme_pred_mode & 0x3];
4141     } else {
4142         for (i = 0; i < 8; i++) {
4143             vme_sub_blocks_pred_mode[i] = ((vme_pred_mode >> (4 * i)) & 0xf);
4144             assert(vme_sub_blocks_pred_mode[i] < VME_B_INTRA_MODE_COUNT);
4145             pak_sub_blocks_pred_mode[i] = vp8_intra_block_mode_map[vme_sub_blocks_pred_mode[i]];
4146             pak_pred_mode |= (pak_sub_blocks_pred_mode[i] << (4 * i));
4147         }
4148     }
4149
4150     return pak_pred_mode;
4151 }
4152 static void
4153 gen8_mfc_vp8_pak_object_intra(VADriverContextP ctx,
4154                               struct intel_encoder_context *encoder_context,
4155                               unsigned int *msg,
4156                               int x, int y,
4157                               struct intel_batchbuffer *batch)
4158 {
4159     unsigned int vme_intra_mb_mode, vme_chroma_pred_mode;
4160     unsigned int pak_intra_mb_mode, pak_chroma_pred_mode;
4161     unsigned int vme_luma_pred_mode[2], pak_luma_pred_mode[2];
4162
4163     if (batch == NULL)
4164         batch = encoder_context->base.batch;
4165
4166     vme_intra_mb_mode = ((msg[0] & 0x30) >> 4);
4167     assert((vme_intra_mb_mode == 0) || (vme_intra_mb_mode == 2)); //vp8 only support intra_16x16 and intra_4x4
4168     pak_intra_mb_mode = (vme_intra_mb_mode >> 1);
4169
4170     vme_luma_pred_mode[0] = msg[1];
4171     vme_luma_pred_mode[1] = msg[2];
4172     vme_chroma_pred_mode = msg[3] & 0x3;
4173
4174     pak_luma_pred_mode[0] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[0], pak_intra_mb_mode);
4175     pak_luma_pred_mode[1] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[1], pak_intra_mb_mode);
4176     pak_chroma_pred_mode = gen8_mfc_vp8_intra_mb_mode_map(vme_chroma_pred_mode, 0);
4177
4178     BEGIN_BCS_BATCH(batch, 7);
4179
4180     OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4181     OUT_BCS_BATCH(batch, 0);
4182     OUT_BCS_BATCH(batch, 0);
4183     OUT_BCS_BATCH(batch,
4184                   (0 << 20) |                    /* mv format: intra mb */
4185                   (0 << 18) |                    /* Segment ID */
4186                   (0 << 17) |                    /* disable coeff clamp */
4187                   (1 << 13) |                    /* intra mb flag */
4188                   (0 << 11) |                /* refer picture select: last frame */
4189                   (pak_intra_mb_mode << 8) |     /* mb type */
4190                   (pak_chroma_pred_mode << 4) |  /* mb uv mode */
4191                   (0 << 2) |                     /* skip mb flag: disable */
4192                   0);
4193
4194     OUT_BCS_BATCH(batch, (y << 16) | x);
4195     OUT_BCS_BATCH(batch, pak_luma_pred_mode[0]);
4196     OUT_BCS_BATCH(batch, pak_luma_pred_mode[1]);
4197
4198     ADVANCE_BCS_BATCH(batch);
4199 }
4200
4201 static void
4202 gen8_mfc_vp8_pak_object_inter(VADriverContextP ctx,
4203                               struct intel_encoder_context *encoder_context,
4204                               unsigned int *msg,
4205                               int offset,
4206                               int x, int y,
4207                               struct intel_batchbuffer *batch)
4208 {
4209     int i;
4210
4211     if (batch == NULL)
4212         batch = encoder_context->base.batch;
4213
4214     /* only support inter_16x16 now */
4215     assert((msg[AVC_INTER_MSG_OFFSET] & INTER_MODE_MASK) == INTER_16X16);
4216     /* for inter_16x16, all 16 MVs should be same,
4217      * and move mv to the vme mb start address to make sure offset is 64 bytes aligned
4218      * as vp8 spec, all vp8 luma motion vectors are doulbled stored
4219      */
4220     msg[0] = (((msg[AVC_INTER_MV_OFFSET / 4] & 0xffff0000) << 1) | ((msg[AVC_INTER_MV_OFFSET / 4] << 1) & 0xffff));
4221
4222     for (i = 1; i < 16; i++) {
4223         msg[i] = msg[0];
4224     }
4225
4226     BEGIN_BCS_BATCH(batch, 7);
4227
4228     OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
4229     OUT_BCS_BATCH(batch,
4230                   (0 << 29) |           /* enable inline mv data: disable */
4231                   64);
4232     OUT_BCS_BATCH(batch,
4233                   offset);
4234     OUT_BCS_BATCH(batch,
4235                   (4 << 20) |           /* mv format: inter */
4236                   (0 << 18) |           /* Segment ID */
4237                   (0 << 17) |           /* coeff clamp: disable */
4238                   (0 << 13) |       /* intra mb flag: inter mb */
4239                   (0 << 11) |       /* refer picture select: last frame */
4240                   (0 << 8) |            /* mb type: 16x16 */
4241                   (0 << 4) |        /* mb uv mode: dc_pred */
4242                   (0 << 2) |        /* skip mb flag: disable */
4243                   0);
4244
4245     OUT_BCS_BATCH(batch, (y << 16) | x);
4246
4247     /*new mv*/
4248     OUT_BCS_BATCH(batch, 0x8);
4249     OUT_BCS_BATCH(batch, 0x8);
4250
4251     ADVANCE_BCS_BATCH(batch);
4252 }
4253
4254 static void
4255 gen8_mfc_vp8_pak_pipeline(VADriverContextP ctx,
4256                           struct encode_state *encode_state,
4257                           struct intel_encoder_context *encoder_context,
4258                           struct intel_batchbuffer *slice_batch)
4259 {
4260     struct gen6_vme_context *vme_context = encoder_context->vme_context;
4261     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
4262     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4263     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
4264     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
4265     unsigned int *msg = NULL;
4266     unsigned char *msg_ptr = NULL;
4267     unsigned int i, offset, is_intra_frame;
4268
4269     is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4270
4271     dri_bo_map(vme_context->vme_output.bo, 1);
4272     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
4273
4274     for (i = 0; i < width_in_mbs * height_in_mbs; i++) {
4275         int h_pos = i % width_in_mbs;
4276         int v_pos = i / width_in_mbs;
4277         msg = (unsigned int *)(msg_ptr + i * vme_context->vme_output.size_block);
4278
4279         if (is_intra_frame) {
4280             gen8_mfc_vp8_pak_object_intra(ctx,
4281                                           encoder_context,
4282                                           msg,
4283                                           h_pos, v_pos,
4284                                           slice_batch);
4285         } else {
4286             int inter_rdo, intra_rdo;
4287             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
4288             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
4289
4290             if (intra_rdo < inter_rdo) {
4291                 gen8_mfc_vp8_pak_object_intra(ctx,
4292                                               encoder_context,
4293                                               msg,
4294                                               h_pos, v_pos,
4295                                               slice_batch);
4296             } else {
4297                 offset = i * vme_context->vme_output.size_block;
4298                 gen8_mfc_vp8_pak_object_inter(ctx,
4299                                               encoder_context,
4300                                               msg,
4301                                               offset,
4302                                               h_pos, v_pos,
4303                                               slice_batch);
4304             }
4305         }
4306     }
4307
4308     dri_bo_unmap(vme_context->vme_output.bo);
4309 }
4310
4311 /*
4312  * A batch buffer for vp8 pak object commands
4313  */
4314 static dri_bo *
4315 gen8_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
4316                                   struct encode_state *encode_state,
4317                                   struct intel_encoder_context *encoder_context)
4318 {
4319     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4320     struct intel_batchbuffer *batch;
4321     dri_bo *batch_bo;
4322
4323     batch = mfc_context->aux_batchbuffer;
4324     batch_bo = batch->buffer;
4325
4326     gen8_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
4327
4328     intel_batchbuffer_align(batch, 8);
4329
4330     BEGIN_BCS_BATCH(batch, 2);
4331     OUT_BCS_BATCH(batch, 0);
4332     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
4333     ADVANCE_BCS_BATCH(batch);
4334
4335     dri_bo_reference(batch_bo);
4336     intel_batchbuffer_free(batch);
4337     mfc_context->aux_batchbuffer = NULL;
4338
4339     return batch_bo;
4340 }
4341
4342 static void
4343 gen8_mfc_vp8_pipeline_programing(VADriverContextP ctx,
4344                                  struct encode_state *encode_state,
4345                                  struct intel_encoder_context *encoder_context)
4346 {
4347     struct intel_batchbuffer *batch = encoder_context->base.batch;
4348     dri_bo *slice_batch_bo;
4349
4350     slice_batch_bo = gen8_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
4351
4352     // begin programing
4353     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
4354     intel_batchbuffer_emit_mi_flush(batch);
4355
4356     // picture level programing
4357     gen8_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
4358
4359     BEGIN_BCS_BATCH(batch, 4);
4360     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
4361     OUT_BCS_RELOC64(batch,
4362                     slice_batch_bo,
4363                     I915_GEM_DOMAIN_COMMAND, 0,
4364                     0);
4365     OUT_BCS_BATCH(batch, 0);
4366     ADVANCE_BCS_BATCH(batch);
4367
4368     // end programing
4369     intel_batchbuffer_end_atomic(batch);
4370
4371     dri_bo_unreference(slice_batch_bo);
4372 }
4373
4374 static int gen8_mfc_calc_vp8_coded_buffer_size(VADriverContextP ctx,
4375                                                struct encode_state *encode_state,
4376                                                struct intel_encoder_context *encoder_context)
4377 {
4378     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4379     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
4380     unsigned char is_intra_frame = !pic_param->pic_flags.bits.frame_type;
4381     unsigned int *vp8_encoding_status, i, first_partition_bytes, token_partition_bytes, vp8_coded_bytes;
4382
4383     int partition_num = 1 << pic_param->pic_flags.bits.num_token_partitions;
4384
4385     first_partition_bytes = token_partition_bytes = vp8_coded_bytes = 0;
4386
4387     dri_bo_map(mfc_context->vp8_state.token_statistics_bo, 0);
4388
4389     vp8_encoding_status = (unsigned int *)mfc_context->vp8_state.token_statistics_bo->virtual;
4390     first_partition_bytes = (vp8_encoding_status[0] + 7) / 8;
4391
4392     for (i = 1; i <= partition_num; i++)
4393         token_partition_bytes += (vp8_encoding_status[i] + 7) / 8;
4394
4395     /*coded_bytes includes P0~P8 partitions bytes + uncompresse date bytes + partion_size bytes in bitstream + 3 extra bytes */
4396     /*it seems the last partition size in vp8 status buffer is smaller than reality. so add 3 extra bytes */
4397     vp8_coded_bytes = first_partition_bytes + token_partition_bytes + (3 + 7 * !!is_intra_frame) + (partition_num - 1) * 3 + 3;
4398
4399     dri_bo_unmap(mfc_context->vp8_state.token_statistics_bo);
4400
4401     dri_bo_map(mfc_context->vp8_state.final_frame_bo, 0);
4402     struct i965_coded_buffer_segment *coded_buffer_segment = (struct i965_coded_buffer_segment *)(mfc_context->vp8_state.final_frame_bo->virtual);
4403     coded_buffer_segment->base.size = vp8_coded_bytes;
4404     dri_bo_unmap(mfc_context->vp8_state.final_frame_bo);
4405
4406     return vp8_coded_bytes;
4407 }
4408
4409 static VAStatus
4410 gen8_mfc_vp8_encode_picture(VADriverContextP ctx,
4411                             struct encode_state *encode_state,
4412                             struct intel_encoder_context *encoder_context)
4413 {
4414     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
4415     unsigned int rate_control_mode = encoder_context->rate_control_mode;
4416     int current_frame_bits_size;
4417     int sts;
4418
4419     gen8_mfc_vp8_init(ctx, encode_state, encoder_context);
4420     intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
4421     /*Programing bcs pipeline*/
4422     gen8_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
4423     gen8_mfc_run(ctx, encode_state, encoder_context);
4424     current_frame_bits_size = 8 * gen8_mfc_calc_vp8_coded_buffer_size(ctx, encode_state, encoder_context);
4425
4426     if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
4427         sts = gen8_mfc_vp8_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
4428         if (sts == BRC_NO_HRD_VIOLATION) {
4429             gen8_mfc_vp8_hrd_context_update(encode_state, mfc_context);
4430         } else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
4431             if (!mfc_context->hrd.violation_noted) {
4432                 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP) ? "overflow" : "underflow");
4433                 mfc_context->hrd.violation_noted = 1;
4434             }
4435             return VA_STATUS_SUCCESS;
4436         }
4437     }
4438
4439     return VA_STATUS_SUCCESS;
4440 }
4441
4442 static void
4443 gen8_mfc_context_destroy(void *context)
4444 {
4445     struct gen6_mfc_context *mfc_context = context;
4446     int i;
4447
4448     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
4449     mfc_context->post_deblocking_output.bo = NULL;
4450
4451     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
4452     mfc_context->pre_deblocking_output.bo = NULL;
4453
4454     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
4455     mfc_context->uncompressed_picture_source.bo = NULL;
4456
4457     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
4458     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
4459
4460     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
4461         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
4462         mfc_context->direct_mv_buffers[i].bo = NULL;
4463     }
4464
4465     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
4466     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
4467
4468     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
4469     mfc_context->macroblock_status_buffer.bo = NULL;
4470
4471     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
4472     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
4473
4474     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
4475     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
4476
4477
4478     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
4479         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
4480         mfc_context->reference_surfaces[i].bo = NULL;
4481     }
4482
4483     gen8_gpe_context_destroy(&mfc_context->gpe_context);
4484
4485     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
4486     mfc_context->mfc_batchbuffer_surface.bo = NULL;
4487
4488     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
4489     mfc_context->aux_batchbuffer_surface.bo = NULL;
4490
4491     if (mfc_context->aux_batchbuffer)
4492         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
4493
4494     mfc_context->aux_batchbuffer = NULL;
4495
4496     dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
4497     mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
4498
4499     dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
4500     mfc_context->vp8_state.final_frame_bo = NULL;
4501
4502     dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
4503     mfc_context->vp8_state.frame_header_bo = NULL;
4504
4505     dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
4506     mfc_context->vp8_state.intermediate_bo = NULL;
4507
4508     dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
4509     mfc_context->vp8_state.mpc_row_store_bo = NULL;
4510
4511     dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
4512     mfc_context->vp8_state.stream_out_bo = NULL;
4513
4514     dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
4515     mfc_context->vp8_state.token_statistics_bo = NULL;
4516
4517     free(mfc_context);
4518 }
4519
4520 static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
4521                                   VAProfile profile,
4522                                   struct encode_state *encode_state,
4523                                   struct intel_encoder_context *encoder_context)
4524 {
4525     VAStatus vaStatus;
4526
4527     switch (profile) {
4528     case VAProfileH264ConstrainedBaseline:
4529     case VAProfileH264Main:
4530     case VAProfileH264High:
4531     case VAProfileH264MultiviewHigh:
4532     case VAProfileH264StereoHigh:
4533         vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
4534         break;
4535
4536     case VAProfileMPEG2Simple:
4537     case VAProfileMPEG2Main:
4538         vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
4539         break;
4540
4541     case VAProfileJPEGBaseline:
4542         jpeg_init_default_qmatrix(ctx, encoder_context);
4543         vaStatus = gen8_mfc_jpeg_encode_picture(ctx, encode_state, encoder_context);
4544         break;
4545
4546     case VAProfileVP8Version0_3:
4547         vaStatus = gen8_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
4548         break;
4549
4550     default:
4551         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
4552         break;
4553     }
4554
4555     return vaStatus;
4556 }
4557
4558 extern Bool i965_encoder_vp8_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
4559
4560 Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
4561 {
4562     struct i965_driver_data *i965 = i965_driver_data(ctx);
4563     struct gen6_mfc_context *mfc_context;
4564
4565     if (IS_CHERRYVIEW(i965->intel.device_info) && encoder_context->codec == CODEC_VP8)
4566         return i965_encoder_vp8_pak_context_init(ctx, encoder_context);
4567
4568     mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
4569     assert(mfc_context);
4570     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
4571
4572     mfc_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
4573     mfc_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
4574     mfc_context->gpe_context.curbe.length = 32 * 4;
4575     mfc_context->gpe_context.sampler.entry_size = 0;
4576     mfc_context->gpe_context.sampler.max_entries = 0;
4577
4578     if (i965->intel.eu_total > 0)
4579         mfc_context->gpe_context.vfe_state.max_num_threads = 6 * i965->intel.eu_total;
4580     else
4581         mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
4582
4583     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
4584     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
4585     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
4586     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
4587
4588     if (IS_GEN9(i965->intel.device_info)) {
4589         gen8_gpe_load_kernels(ctx,
4590                               &mfc_context->gpe_context,
4591                               gen9_mfc_kernels,
4592                               1);
4593     } else {
4594         gen8_gpe_load_kernels(ctx,
4595                               &mfc_context->gpe_context,
4596                               gen8_mfc_kernels,
4597                               1);
4598     }
4599
4600     mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select;
4601     mfc_context->set_surface_state = gen8_mfc_surface_state;
4602     mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state;
4603     mfc_context->avc_img_state = gen8_mfc_avc_img_state;
4604     mfc_context->avc_qm_state = gen8_mfc_avc_qm_state;
4605     mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state;
4606     mfc_context->insert_object = gen8_mfc_avc_insert_object;
4607     mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
4608
4609     encoder_context->mfc_context = mfc_context;
4610     encoder_context->mfc_context_destroy = gen8_mfc_context_destroy;
4611     encoder_context->mfc_pipeline = gen8_mfc_pipeline;
4612
4613     if (encoder_context->codec == CODEC_VP8)
4614         encoder_context->mfc_brc_prepare = gen8_mfc_vp8_brc_prepare;
4615     else
4616         encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
4617
4618     return True;
4619 }