OSDN Git Service

test/streamables: use new vaStr methods when appropriate
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV     2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77
78         if (!gen7_avc_surface)
79             return;
80
81         gen7_avc_surface->base.frame_store_id = -1;
82         assert((obj_surface->size & 0x3f) == 0);
83         obj_surface->private_data = gen7_avc_surface;
84     }
85
86     /* DMV buffers now relate to the whole frame, irrespective of
87        field coding modes */
88     if (gen7_avc_surface->dmv_top == NULL) {
89         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90                                                  "direct mv w/r buffer",
91                                                  width_in_mbs * height_in_mbs * 128,
92                                                  0x1000);
93         assert(gen7_avc_surface->dmv_top);
94     }
95 }
96
97 static void
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99                           struct decode_state *decode_state,
100                           int standard_select,
101                           struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG ||
109            standard_select == MFX_FORMAT_VP8);
110
111     BEGIN_BCS_BATCH(batch, 5);
112     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
113     OUT_BCS_BATCH(batch,
114                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
115                   (MFD_MODE_VLD << 15) | /* VLD mode */
116                   (0 << 10) | /* disable Stream-Out */
117                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
118                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
119                   (0 << 5)  | /* not in stitch mode */
120                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
121                   (standard_select << 0));
122     OUT_BCS_BATCH(batch,
123                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
124                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
125                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
126                   (0 << 1)  |
127                   (0 << 0));
128     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
129     OUT_BCS_BATCH(batch, 0); /* reserved */
130     ADVANCE_BCS_BATCH(batch);
131 }
132
133 static void
134 gen8_mfd_surface_state(VADriverContextP ctx,
135                        struct decode_state *decode_state,
136                        int standard_select,
137                        struct gen7_mfd_context *gen7_mfd_context)
138 {
139     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140     struct object_surface *obj_surface = decode_state->render_object;
141     unsigned int y_cb_offset;
142     unsigned int y_cr_offset;
143     unsigned int surface_format;
144
145     assert(obj_surface);
146
147     y_cb_offset = obj_surface->y_cb_offset;
148     y_cr_offset = obj_surface->y_cr_offset;
149
150     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151                      MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
152
153     BEGIN_BCS_BATCH(batch, 6);
154     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155     OUT_BCS_BATCH(batch, 0);
156     OUT_BCS_BATCH(batch,
157                   ((obj_surface->orig_height - 1) << 18) |
158                   ((obj_surface->orig_width - 1) << 4));
159     OUT_BCS_BATCH(batch,
160                   (surface_format << 28) | /* 420 planar YUV surface */
161                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162                   (0 << 22) | /* surface object control state, ignored */
163                   ((obj_surface->width - 1) << 3) | /* pitch */
164                   (0 << 2)  | /* must be 0 */
165                   (1 << 1)  | /* must be tiled */
166                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
167     OUT_BCS_BATCH(batch,
168                   (0 << 16) | /* X offset for U(Cb), must be 0 */
169                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
170     OUT_BCS_BATCH(batch,
171                   (0 << 16) | /* X offset for V(Cr), must be 0 */
172                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173     ADVANCE_BCS_BATCH(batch);
174 }
175
176 static void
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178                              struct decode_state *decode_state,
179                              int standard_select,
180                              struct gen7_mfd_context *gen7_mfd_context)
181 {
182     struct i965_driver_data *i965 = i965_driver_data(ctx);
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188     /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                         0);
193     else {
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197     }
198     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
199
200     /* Post-debloing 4-6 */
201     if (gen7_mfd_context->post_deblocking_output.valid)
202         OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
203                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                         0);
205     else {
206         OUT_BCS_BATCH(batch, 0);
207
208         OUT_BCS_BATCH(batch, 0);
209     }
210     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
211
212     /* uncompressed-video & stream out 7-12 */
213     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
214     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
215     OUT_BCS_BATCH(batch, 0);
216     OUT_BCS_BATCH(batch, 0);
217     OUT_BCS_BATCH(batch, 0);
218     OUT_BCS_BATCH(batch, 0);
219
220     /* intra row-store scratch 13-15 */
221     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
222         OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
223                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
224                         0);
225     else {
226         OUT_BCS_BATCH(batch, 0);
227
228         OUT_BCS_BATCH(batch, 0);
229     }
230     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
231
232     /* deblocking-filter-row-store 16-18 */
233     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
234         OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
235                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
236                         0);
237     else {
238         OUT_BCS_BATCH(batch, 0);
239         OUT_BCS_BATCH(batch, 0);
240     }
241
242     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
243
244     /* DW 19..50 */
245     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
246         struct object_surface *obj_surface;
247
248         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
249             gen7_mfd_context->reference_surface[i].obj_surface &&
250             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
251             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
252
253             OUT_BCS_RELOC64(batch, obj_surface->bo,
254                             I915_GEM_DOMAIN_INSTRUCTION, 0,
255                             0);
256         } else {
257             OUT_BCS_BATCH(batch, 0);
258             OUT_BCS_BATCH(batch, 0);
259         }
260
261     }
262
263     /* reference property 51 */
264     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
265
266     /* Macroblock status & ILDB 52-57 */
267     OUT_BCS_BATCH(batch, 0);
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271     OUT_BCS_BATCH(batch, 0);
272     OUT_BCS_BATCH(batch, 0);
273
274     /* the second Macroblock status 58-60 */
275     OUT_BCS_BATCH(batch, 0);
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch, 0);
278
279     ADVANCE_BCS_BATCH(batch);
280 }
281
282 static void
283 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
284                                  dri_bo *slice_data_bo,
285                                  int standard_select,
286                                  struct gen7_mfd_context *gen7_mfd_context)
287 {
288     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
289     struct i965_driver_data *i965 = i965_driver_data(ctx);
290
291     BEGIN_BCS_BATCH(batch, 26);
292     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
293     /* MFX In BS 1-5 */
294     OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
295     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
296     /* Upper bound 4-5 */
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299
300     /* MFX indirect MV 6-10 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307     /* MFX IT_COFF 11-15 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314     /* MFX IT_DBLK 16-20 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321     /* MFX PAK_BSE object for encoder 21-25 */
322     OUT_BCS_BATCH(batch, 0);
323     OUT_BCS_BATCH(batch, 0);
324     OUT_BCS_BATCH(batch, 0);
325     OUT_BCS_BATCH(batch, 0);
326     OUT_BCS_BATCH(batch, 0);
327
328     ADVANCE_BCS_BATCH(batch);
329 }
330
331 static void
332 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
333                                  struct decode_state *decode_state,
334                                  int standard_select,
335                                  struct gen7_mfd_context *gen7_mfd_context)
336 {
337     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
338     struct i965_driver_data *i965 = i965_driver_data(ctx);
339
340     BEGIN_BCS_BATCH(batch, 10);
341     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
342
343     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
344         OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
345                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
346                         0);
347     else {
348         OUT_BCS_BATCH(batch, 0);
349         OUT_BCS_BATCH(batch, 0);
350     }
351
352     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
353     /* MPR Row Store Scratch buffer 4-6 */
354     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
355         OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
356                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
357                         0);
358     else {
359         OUT_BCS_BATCH(batch, 0);
360         OUT_BCS_BATCH(batch, 0);
361     }
362
363     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
364
365     /* Bitplane 7-9 */
366     if (gen7_mfd_context->bitplane_read_buffer.valid)
367         OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
368                         I915_GEM_DOMAIN_INSTRUCTION, 0,
369                         0);
370     else {
371         OUT_BCS_BATCH(batch, 0);
372         OUT_BCS_BATCH(batch, 0);
373     }
374     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
375     ADVANCE_BCS_BATCH(batch);
376 }
377
378 static void
379 gen8_mfd_qm_state(VADriverContextP ctx,
380                   int qm_type,
381                   unsigned char *qm,
382                   int qm_length,
383                   struct gen7_mfd_context *gen7_mfd_context)
384 {
385     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
386     unsigned int qm_buffer[16];
387
388     assert(qm_length <= 16 * 4);
389     memcpy(qm_buffer, qm, qm_length);
390
391     BEGIN_BCS_BATCH(batch, 18);
392     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
393     OUT_BCS_BATCH(batch, qm_type << 0);
394     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
395     ADVANCE_BCS_BATCH(batch);
396 }
397
398 static void
399 gen8_mfd_avc_img_state(VADriverContextP ctx,
400                        struct decode_state *decode_state,
401                        struct gen7_mfd_context *gen7_mfd_context)
402 {
403     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
404     int img_struct;
405     int mbaff_frame_flag;
406     unsigned int width_in_mbs, height_in_mbs;
407     VAPictureParameterBufferH264 *pic_param;
408
409     assert(decode_state->pic_param && decode_state->pic_param->buffer);
410     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
412
413     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
414         img_struct = 1;
415     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
416         img_struct = 3;
417     else
418         img_struct = 0;
419
420     if ((img_struct & 0x1) == 0x1) {
421         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
422     } else {
423         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
424     }
425
426     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
427         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
428         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
429     } else {
430         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
431     }
432
433     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
434                         !pic_param->pic_fields.bits.field_pic_flag);
435
436     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
437     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
438
439     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
440     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
441            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
442     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
443
444     BEGIN_BCS_BATCH(batch, 17);
445     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
446     OUT_BCS_BATCH(batch,
447                   (width_in_mbs * height_in_mbs - 1));
448     OUT_BCS_BATCH(batch,
449                   ((height_in_mbs - 1) << 16) |
450                   ((width_in_mbs - 1) << 0));
451     OUT_BCS_BATCH(batch,
452                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
453                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
454                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
455                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
456                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
457                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
458                   (img_struct << 8));
459     OUT_BCS_BATCH(batch,
460                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
461                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
462                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
463                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
464                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
465                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
466                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
467                   (mbaff_frame_flag << 1) |
468                   (pic_param->pic_fields.bits.field_pic_flag << 0));
469     OUT_BCS_BATCH(batch, 0);
470     OUT_BCS_BATCH(batch, 0);
471     OUT_BCS_BATCH(batch, 0);
472     OUT_BCS_BATCH(batch, 0);
473     OUT_BCS_BATCH(batch, 0);
474     OUT_BCS_BATCH(batch, 0);
475     OUT_BCS_BATCH(batch, 0);
476     OUT_BCS_BATCH(batch, 0);
477     OUT_BCS_BATCH(batch, 0);
478     OUT_BCS_BATCH(batch, 0);
479     OUT_BCS_BATCH(batch, 0);
480     OUT_BCS_BATCH(batch, 0);
481     ADVANCE_BCS_BATCH(batch);
482 }
483
484 static void
485 gen8_mfd_avc_qm_state(VADriverContextP ctx,
486                       struct decode_state *decode_state,
487                       struct gen7_mfd_context *gen7_mfd_context)
488 {
489     VAIQMatrixBufferH264 *iq_matrix;
490     VAPictureParameterBufferH264 *pic_param;
491
492     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
493         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
494     else
495         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
496
497     assert(decode_state->pic_param && decode_state->pic_param->buffer);
498     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
499
500     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
501     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
502
503     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
504         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
505         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
506     }
507 }
508
509 static inline void
510 gen8_mfd_avc_picid_state(VADriverContextP ctx,
511                          struct decode_state *decode_state,
512                          struct gen7_mfd_context *gen7_mfd_context)
513 {
514     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
515                                gen7_mfd_context->reference_surface);
516 }
517
518 static void
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520                               struct decode_state *decode_state,
521                               VAPictureParameterBufferH264 *pic_param,
522                               VASliceParameterBufferH264 *slice_param,
523                               struct gen7_mfd_context *gen7_mfd_context)
524 {
525     struct i965_driver_data *i965 = i965_driver_data(ctx);
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
545                             I915_GEM_DOMAIN_INSTRUCTION, 0,
546                             0);
547         } else {
548             OUT_BCS_BATCH(batch, 0);
549             OUT_BCS_BATCH(batch, 0);
550         }
551     }
552
553     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
554
555     /* the current decoding frame/field */
556     va_pic = &pic_param->CurrPic;
557     obj_surface = decode_state->render_object;
558     assert(obj_surface->bo && obj_surface->private_data);
559     gen7_avc_surface = obj_surface->private_data;
560
561     OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
562                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
563                     0);
564
565     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
566
567     /* POC List */
568     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
569         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
570
571         if (obj_surface) {
572             const VAPictureH264 * const va_pic = avc_find_picture(
573                                                      obj_surface->base.id, pic_param->ReferenceFrames,
574                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
575
576             assert(va_pic != NULL);
577             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
578             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
579         } else {
580             OUT_BCS_BATCH(batch, 0);
581             OUT_BCS_BATCH(batch, 0);
582         }
583     }
584
585     va_pic = &pic_param->CurrPic;
586     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
587     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
588
589     ADVANCE_BCS_BATCH(batch);
590 }
591
592 static void
593 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
594                                  VAPictureParameterBufferH264 *pic_param,
595                                  VASliceParameterBufferH264 *next_slice_param,
596                                  struct gen7_mfd_context *gen7_mfd_context)
597 {
598     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
599 }
600
601 static void
602 gen8_mfd_avc_slice_state(VADriverContextP ctx,
603                          VAPictureParameterBufferH264 *pic_param,
604                          VASliceParameterBufferH264 *slice_param,
605                          VASliceParameterBufferH264 *next_slice_param,
606                          struct gen7_mfd_context *gen7_mfd_context)
607 {
608     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
609     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
610     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
611     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
612     int num_ref_idx_l0, num_ref_idx_l1;
613     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
614                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
615     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
616     int slice_type;
617
618     if (slice_param->slice_type == SLICE_TYPE_I ||
619         slice_param->slice_type == SLICE_TYPE_SI) {
620         slice_type = SLICE_TYPE_I;
621     } else if (slice_param->slice_type == SLICE_TYPE_P ||
622                slice_param->slice_type == SLICE_TYPE_SP) {
623         slice_type = SLICE_TYPE_P;
624     } else {
625         assert(slice_param->slice_type == SLICE_TYPE_B);
626         slice_type = SLICE_TYPE_B;
627     }
628
629     if (slice_type == SLICE_TYPE_I) {
630         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
631         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
632         num_ref_idx_l0 = 0;
633         num_ref_idx_l1 = 0;
634     } else if (slice_type == SLICE_TYPE_P) {
635         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
637         num_ref_idx_l1 = 0;
638     } else {
639         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
640         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
641     }
642
643     first_mb_in_slice = slice_param->first_mb_in_slice;
644     slice_hor_pos = first_mb_in_slice % width_in_mbs;
645     slice_ver_pos = first_mb_in_slice / width_in_mbs;
646
647     if (mbaff_picture)
648         slice_ver_pos = slice_ver_pos << 1;
649     if (next_slice_param) {
650         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
651         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
652         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
653
654         if (mbaff_picture)
655             next_slice_ver_pos = next_slice_ver_pos << 1;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch,
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch,
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch,
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) |
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch,
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch,
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
839                                        gen7_mfd_context->reference_surface);
840     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
841     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
842     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
843     assert(height_in_mbs > 0 && height_in_mbs <= 256);
844
845     /* Current decoded picture */
846     obj_surface = decode_state->render_object;
847     if (pic_param->pic_fields.bits.reference_pic_flag)
848         obj_surface->flags |= SURFACE_REFERENCED;
849     else
850         obj_surface->flags &= ~SURFACE_REFERENCED;
851
852     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
853     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
854
855     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
856     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
857     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
858     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
859
860     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
861     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
862     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
863     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
864
865     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
866     bo = dri_bo_alloc(i965->intel.bufmgr,
867                       "intra row store",
868                       width_in_mbs * 64,
869                       0x1000);
870     assert(bo);
871     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
872     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
873
874     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
875     bo = dri_bo_alloc(i965->intel.bufmgr,
876                       "deblocking filter row store",
877                       width_in_mbs * 64 * 4,
878                       0x1000);
879     assert(bo);
880     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
881     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
882
883     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
884     bo = dri_bo_alloc(i965->intel.bufmgr,
885                       "bsd mpc row store",
886                       width_in_mbs * 64 * 2,
887                       0x1000);
888     assert(bo);
889     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
890     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
891
892     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
893     bo = dri_bo_alloc(i965->intel.bufmgr,
894                       "mpr row store",
895                       width_in_mbs * 64 * 2,
896                       0x1000);
897     assert(bo);
898     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
899     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
900
901     gen7_mfd_context->bitplane_read_buffer.valid = 0;
902 }
903
904 static void
905 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
906                             struct decode_state *decode_state,
907                             struct gen7_mfd_context *gen7_mfd_context)
908 {
909     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
910     VAPictureParameterBufferH264 *pic_param;
911     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
912     dri_bo *slice_data_bo;
913     int i, j;
914
915     assert(decode_state->pic_param && decode_state->pic_param->buffer);
916     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
917     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
918
919     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
920     intel_batchbuffer_emit_mi_flush(batch);
921     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
923     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
924     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
925     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
926     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
927     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
928
929     for (j = 0; j < decode_state->num_slice_params; j++) {
930         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
931         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
932         slice_data_bo = decode_state->slice_datas[j]->bo;
933         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
934
935         if (j == decode_state->num_slice_params - 1)
936             next_slice_group_param = NULL;
937         else
938             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
939
940         if (j == 0 && slice_param->first_mb_in_slice)
941             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
942
943         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
944             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
945             assert((slice_param->slice_type == SLICE_TYPE_I) ||
946                    (slice_param->slice_type == SLICE_TYPE_SI) ||
947                    (slice_param->slice_type == SLICE_TYPE_P) ||
948                    (slice_param->slice_type == SLICE_TYPE_SP) ||
949                    (slice_param->slice_type == SLICE_TYPE_B));
950
951             if (i < decode_state->slice_params[j]->num_elements - 1)
952                 next_slice_param = slice_param + 1;
953             else
954                 next_slice_param = next_slice_group_param;
955
956             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
957             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
958             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
959             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
960             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
961             slice_param++;
962         }
963     }
964
965     intel_batchbuffer_end_atomic(batch);
966     intel_batchbuffer_flush(batch);
967 }
968
969 static void
970 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
971                            struct decode_state *decode_state,
972                            struct gen7_mfd_context *gen7_mfd_context)
973 {
974     VAPictureParameterBufferMPEG2 *pic_param;
975     struct i965_driver_data *i965 = i965_driver_data(ctx);
976     struct object_surface *obj_surface;
977     dri_bo *bo;
978     unsigned int width_in_mbs;
979
980     assert(decode_state->pic_param && decode_state->pic_param->buffer);
981     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
982     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
983
984     mpeg2_set_reference_surfaces(
985         ctx,
986         gen7_mfd_context->reference_surface,
987         decode_state,
988         pic_param
989     );
990
991     /* Current decoded picture */
992     obj_surface = decode_state->render_object;
993     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
994
995     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
996     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
997     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
998     gen7_mfd_context->pre_deblocking_output.valid = 1;
999
1000     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1001     bo = dri_bo_alloc(i965->intel.bufmgr,
1002                       "bsd mpc row store",
1003                       width_in_mbs * 96,
1004                       0x1000);
1005     assert(bo);
1006     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1007     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1008
1009     gen7_mfd_context->post_deblocking_output.valid = 0;
1010     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1011     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1012     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1013     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1014 }
1015
1016 static void
1017 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1018                          struct decode_state *decode_state,
1019                          struct gen7_mfd_context *gen7_mfd_context)
1020 {
1021     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1022     VAPictureParameterBufferMPEG2 *pic_param;
1023     unsigned int slice_concealment_disable_bit = 0;
1024
1025     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1026     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1027
1028     slice_concealment_disable_bit = 1;
1029
1030     BEGIN_BCS_BATCH(batch, 13);
1031     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1032     OUT_BCS_BATCH(batch,
1033                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1034                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1035                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1036                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1037                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1038                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1039                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1040                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1041                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1042                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1043                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1044                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1045     OUT_BCS_BATCH(batch,
1046                   pic_param->picture_coding_type << 9);
1047     OUT_BCS_BATCH(batch,
1048                   (slice_concealment_disable_bit << 31) |
1049                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1050                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1051     OUT_BCS_BATCH(batch, 0);
1052     OUT_BCS_BATCH(batch, 0);
1053     OUT_BCS_BATCH(batch, 0);
1054     OUT_BCS_BATCH(batch, 0);
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     ADVANCE_BCS_BATCH(batch);
1061 }
1062
1063 static void
1064 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1065                         struct decode_state *decode_state,
1066                         struct gen7_mfd_context *gen7_mfd_context)
1067 {
1068     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1069     int i, j;
1070
1071     /* Update internal QM state */
1072     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1073         VAIQMatrixBufferMPEG2 * const iq_matrix =
1074             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1075
1076         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1077             iq_matrix->load_intra_quantiser_matrix) {
1078             gen_iq_matrix->load_intra_quantiser_matrix =
1079                 iq_matrix->load_intra_quantiser_matrix;
1080             if (iq_matrix->load_intra_quantiser_matrix) {
1081                 for (j = 0; j < 64; j++)
1082                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1083                         iq_matrix->intra_quantiser_matrix[j];
1084             }
1085         }
1086
1087         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1088             iq_matrix->load_non_intra_quantiser_matrix) {
1089             gen_iq_matrix->load_non_intra_quantiser_matrix =
1090                 iq_matrix->load_non_intra_quantiser_matrix;
1091             if (iq_matrix->load_non_intra_quantiser_matrix) {
1092                 for (j = 0; j < 64; j++)
1093                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1094                         iq_matrix->non_intra_quantiser_matrix[j];
1095             }
1096         }
1097     }
1098
1099     /* Commit QM state to HW */
1100     for (i = 0; i < 2; i++) {
1101         unsigned char *qm = NULL;
1102         int qm_type;
1103
1104         if (i == 0) {
1105             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1106                 qm = gen_iq_matrix->intra_quantiser_matrix;
1107                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1108             }
1109         } else {
1110             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1111                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1112                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1113             }
1114         }
1115
1116         if (!qm)
1117             continue;
1118
1119         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1120     }
1121 }
1122
1123 static void
1124 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1125                           VAPictureParameterBufferMPEG2 *pic_param,
1126                           VASliceParameterBufferMPEG2 *slice_param,
1127                           VASliceParameterBufferMPEG2 *next_slice_param,
1128                           struct gen7_mfd_context *gen7_mfd_context)
1129 {
1130     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1131     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1132     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1133
1134     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1135         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1136         is_field_pic = 1;
1137     is_field_pic_wa = is_field_pic &&
1138                       gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1139
1140     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1141     hpos0 = slice_param->slice_horizontal_position;
1142
1143     if (next_slice_param == NULL) {
1144         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1145         hpos1 = 0;
1146     } else {
1147         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1148         hpos1 = next_slice_param->slice_horizontal_position;
1149     }
1150
1151     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1152
1153     BEGIN_BCS_BATCH(batch, 5);
1154     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1155     OUT_BCS_BATCH(batch,
1156                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1157     OUT_BCS_BATCH(batch,
1158                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1159     OUT_BCS_BATCH(batch,
1160                   hpos0 << 24 |
1161                   vpos0 << 16 |
1162                   mb_count << 8 |
1163                   (next_slice_param == NULL) << 5 |
1164                   (next_slice_param == NULL) << 3 |
1165                   (slice_param->macroblock_offset & 0x7));
1166     OUT_BCS_BATCH(batch,
1167                   (slice_param->quantiser_scale_code << 24) |
1168                   (vpos1 << 8 | hpos1));
1169     ADVANCE_BCS_BATCH(batch);
1170 }
1171
1172 static void
1173 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1174                               struct decode_state *decode_state,
1175                               struct gen7_mfd_context *gen7_mfd_context)
1176 {
1177     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1178     VAPictureParameterBufferMPEG2 *pic_param;
1179     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1180     dri_bo *slice_data_bo;
1181     int i, j;
1182
1183     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1184     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1185
1186     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1187     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1188     intel_batchbuffer_emit_mi_flush(batch);
1189     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1194     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1195
1196     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1197         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1198             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1199
1200     for (j = 0; j < decode_state->num_slice_params; j++) {
1201         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1202         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1203         slice_data_bo = decode_state->slice_datas[j]->bo;
1204         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1205
1206         if (j == decode_state->num_slice_params - 1)
1207             next_slice_group_param = NULL;
1208         else
1209             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1210
1211         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1212             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1213
1214             if (i < decode_state->slice_params[j]->num_elements - 1)
1215                 next_slice_param = slice_param + 1;
1216             else
1217                 next_slice_param = next_slice_group_param;
1218
1219             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1220             slice_param++;
1221         }
1222     }
1223
1224     intel_batchbuffer_end_atomic(batch);
1225     intel_batchbuffer_flush(batch);
1226 }
1227
1228 static const int va_to_gen7_vc1_pic_type[5] = {
1229     GEN7_VC1_I_PICTURE,
1230     GEN7_VC1_P_PICTURE,
1231     GEN7_VC1_B_PICTURE,
1232     GEN7_VC1_BI_PICTURE,
1233     GEN7_VC1_P_PICTURE,
1234 };
1235
1236 static const int va_to_gen7_vc1_mv[4] = {
1237     1, /* 1-MV */
1238     2, /* 1-MV half-pel */
1239     3, /* 1-MV half-pef bilinear */
1240     0, /* Mixed MV */
1241 };
1242
1243 static const int b_picture_scale_factor[21] = {
1244     128, 85,  170, 64,  192,
1245     51,  102, 153, 204, 43,
1246     215, 37,  74,  111, 148,
1247     185, 222, 32,  96,  160,
1248     224,
1249 };
1250
1251 static const int va_to_gen7_vc1_condover[3] = {
1252     0,
1253     2,
1254     3
1255 };
1256
1257 static const int va_to_gen7_vc1_profile[4] = {
1258     GEN7_VC1_SIMPLE_PROFILE,
1259     GEN7_VC1_MAIN_PROFILE,
1260     GEN7_VC1_RESERVED_PROFILE,
1261     GEN7_VC1_ADVANCED_PROFILE
1262 };
1263
1264 static void
1265 gen8_mfd_free_vc1_surface(void **data)
1266 {
1267     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1268
1269     if (!gen7_vc1_surface)
1270         return;
1271
1272     dri_bo_unreference(gen7_vc1_surface->dmv);
1273     free(gen7_vc1_surface);
1274     *data = NULL;
1275 }
1276
1277 static void
1278 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1279                           VAPictureParameterBufferVC1 *pic_param,
1280                           struct object_surface *obj_surface)
1281 {
1282     struct i965_driver_data *i965 = i965_driver_data(ctx);
1283     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1284     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1285     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1286
1287     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1288
1289     if (!gen7_vc1_surface) {
1290         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1291
1292         if (!gen7_vc1_surface)
1293             return;
1294
1295         assert((obj_surface->size & 0x3f) == 0);
1296         obj_surface->private_data = gen7_vc1_surface;
1297     }
1298
1299     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1300     gen7_vc1_surface->intensity_compensation = 0;
1301     gen7_vc1_surface->luma_scale = 0;
1302     gen7_vc1_surface->luma_shift = 0;
1303
1304     if (gen7_vc1_surface->dmv == NULL) {
1305         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1306                                              "direct mv w/r buffer",
1307                                              width_in_mbs * height_in_mbs * 64,
1308                                              0x1000);
1309     }
1310 }
1311
1312 static void
1313 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1314                          struct decode_state *decode_state,
1315                          struct gen7_mfd_context *gen7_mfd_context)
1316 {
1317     VAPictureParameterBufferVC1 *pic_param;
1318     struct i965_driver_data *i965 = i965_driver_data(ctx);
1319     struct object_surface *obj_surface;
1320     dri_bo *bo;
1321     int width_in_mbs;
1322     int picture_type;
1323     int intensity_compensation;
1324
1325     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1326     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1327     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1328     picture_type = pic_param->picture_fields.bits.picture_type;
1329     intensity_compensation = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1330
1331     intel_update_vc1_frame_store_index(ctx,
1332                                        decode_state,
1333                                        pic_param,
1334                                        gen7_mfd_context->reference_surface);
1335
1336     /* Forward reference picture */
1337     obj_surface = decode_state->reference_objects[0];
1338     if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1339         obj_surface &&
1340         obj_surface->private_data) {
1341         if (picture_type == 1 && intensity_compensation) { /* P picture */
1342             struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1343
1344             gen7_vc1_surface->intensity_compensation = intensity_compensation;
1345             gen7_vc1_surface->luma_scale = pic_param->luma_scale;
1346             gen7_vc1_surface->luma_shift = pic_param->luma_shift;
1347         }
1348     }
1349
1350     /* Current decoded picture */
1351     obj_surface = decode_state->render_object;
1352     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1353     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1354
1355     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1356     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1357     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1358     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1359
1360     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1361     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1362     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1363     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1364
1365     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1366     bo = dri_bo_alloc(i965->intel.bufmgr,
1367                       "intra row store",
1368                       width_in_mbs * 64,
1369                       0x1000);
1370     assert(bo);
1371     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1372     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1373
1374     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1375     bo = dri_bo_alloc(i965->intel.bufmgr,
1376                       "deblocking filter row store",
1377                       width_in_mbs * 7 * 64,
1378                       0x1000);
1379     assert(bo);
1380     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1381     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1382
1383     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1384     bo = dri_bo_alloc(i965->intel.bufmgr,
1385                       "bsd mpc row store",
1386                       width_in_mbs * 96,
1387                       0x1000);
1388     assert(bo);
1389     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1390     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1391
1392     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1393
1394     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1395     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1396
1397     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1398         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1399         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1400         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1401         int src_w, src_h;
1402         uint8_t *src = NULL, *dst = NULL;
1403
1404         assert(decode_state->bit_plane->buffer);
1405         src = decode_state->bit_plane->buffer;
1406
1407         bo = dri_bo_alloc(i965->intel.bufmgr,
1408                           "VC-1 Bitplane",
1409                           bitplane_width * height_in_mbs,
1410                           0x1000);
1411         assert(bo);
1412         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1413
1414         dri_bo_map(bo, True);
1415         assert(bo->virtual);
1416         dst = bo->virtual;
1417
1418         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1419             for (src_w = 0; src_w < width_in_mbs; src_w++) {
1420                 int src_index, dst_index;
1421                 int src_shift;
1422                 uint8_t src_value;
1423
1424                 src_index = (src_h * width_in_mbs + src_w) / 2;
1425                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1426                 src_value = ((src[src_index] >> src_shift) & 0xf);
1427
1428                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1429                     src_value |= 0x2;
1430                 }
1431
1432                 dst_index = src_w / 2;
1433                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1434             }
1435
1436             if (src_w & 1)
1437                 dst[src_w / 2] >>= 4;
1438
1439             dst += bitplane_width;
1440         }
1441
1442         dri_bo_unmap(bo);
1443     } else
1444         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1445 }
1446
1447 static void
1448 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1449                        struct decode_state *decode_state,
1450                        struct gen7_mfd_context *gen7_mfd_context)
1451 {
1452     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1453     VAPictureParameterBufferVC1 *pic_param;
1454     struct object_surface *obj_surface;
1455     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1456     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1457     int unified_mv_mode;
1458     int ref_field_pic_polarity = 0;
1459     int scale_factor = 0;
1460     int trans_ac_y = 0;
1461     int dmv_surface_valid = 0;
1462     int brfd = 0;
1463     int fcm = 0;
1464     int picture_type;
1465     int profile;
1466     int overlap;
1467     int interpolation_mode = 0;
1468
1469     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1470     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1471
1472     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1473     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1474     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1475     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1476     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1477     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1478     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1479     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1480
1481     if (dquant == 0) {
1482         alt_pquant_config = 0;
1483         alt_pquant_edge_mask = 0;
1484     } else if (dquant == 2) {
1485         alt_pquant_config = 1;
1486         alt_pquant_edge_mask = 0xf;
1487     } else {
1488         assert(dquant == 1);
1489         if (dquantfrm == 0) {
1490             alt_pquant_config = 0;
1491             alt_pquant_edge_mask = 0;
1492             alt_pq = 0;
1493         } else {
1494             assert(dquantfrm == 1);
1495             alt_pquant_config = 1;
1496
1497             switch (dqprofile) {
1498             case 3:
1499                 if (dqbilevel == 0) {
1500                     alt_pquant_config = 2;
1501                     alt_pquant_edge_mask = 0;
1502                 } else {
1503                     assert(dqbilevel == 1);
1504                     alt_pquant_config = 3;
1505                     alt_pquant_edge_mask = 0;
1506                 }
1507                 break;
1508
1509             case 0:
1510                 alt_pquant_edge_mask = 0xf;
1511                 break;
1512
1513             case 1:
1514                 if (dqdbedge == 3)
1515                     alt_pquant_edge_mask = 0x9;
1516                 else
1517                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1518
1519                 break;
1520
1521             case 2:
1522                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1523                 break;
1524
1525             default:
1526                 assert(0);
1527             }
1528         }
1529     }
1530
1531     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1532         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1533         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1534     } else {
1535         assert(pic_param->mv_fields.bits.mv_mode < 4);
1536         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1537     }
1538
1539     if (pic_param->sequence_fields.bits.interlace == 1 &&
1540         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1541         /* FIXME: calculate reference field picture polarity */
1542         assert(0);
1543         ref_field_pic_polarity = 0;
1544     }
1545
1546     if (pic_param->b_picture_fraction < 21)
1547         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1548
1549     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1550
1551     if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1552         picture_type == GEN7_VC1_I_PICTURE)
1553         picture_type = GEN7_VC1_BI_PICTURE;
1554
1555     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1556         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1557     else {
1558         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1559
1560         /*
1561          * 8.3.6.2.1 Transform Type Selection
1562          * If variable-sized transform coding is not enabled,
1563          * then the 8x8 transform shall be used for all blocks.
1564          * it is also MFX_VC1_PIC_STATE requirement.
1565          */
1566         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1567             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1568             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1569         }
1570     }
1571
1572     if (picture_type == GEN7_VC1_B_PICTURE) {
1573         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1574
1575         obj_surface = decode_state->reference_objects[1];
1576
1577         if (obj_surface)
1578             gen7_vc1_surface = obj_surface->private_data;
1579
1580         if (!gen7_vc1_surface ||
1581             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1582              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1583             dmv_surface_valid = 0;
1584         else
1585             dmv_surface_valid = 1;
1586     }
1587
1588     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1589
1590     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1591         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1592     else {
1593         if (pic_param->picture_fields.bits.top_field_first)
1594             fcm = 2;
1595         else
1596             fcm = 3;
1597     }
1598
1599     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1600         brfd = pic_param->reference_fields.bits.reference_distance;
1601         brfd = (scale_factor * brfd) >> 8;
1602         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1603
1604         if (brfd < 0)
1605             brfd = 0;
1606     }
1607
1608     overlap = pic_param->sequence_fields.bits.overlap;
1609
1610     if (overlap) {
1611         overlap = 0;
1612         if (profile != GEN7_VC1_ADVANCED_PROFILE) {
1613             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1614                 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1615                 overlap = 1;
1616             }
1617         } else {
1618             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1619                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1620                 overlap = 1;
1621             }
1622             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1623                 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE) {
1624                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1625                     overlap = 1;
1626                 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1627                            va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1628                     overlap = 1;
1629                 }
1630             }
1631         }
1632     }
1633
1634     assert(pic_param->conditional_overlap_flag < 3);
1635     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1636
1637     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1638         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1639          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1640         interpolation_mode = 9; /* Half-pel bilinear */
1641     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1642              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1643               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1644         interpolation_mode = 1; /* Half-pel bicubic */
1645     else
1646         interpolation_mode = 0; /* Quarter-pel bicubic */
1647
1648     BEGIN_BCS_BATCH(batch, 6);
1649     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1650     OUT_BCS_BATCH(batch,
1651                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1652                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1653     OUT_BCS_BATCH(batch,
1654                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1655                   dmv_surface_valid << 15 |
1656                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1657                   pic_param->rounding_control << 13 |
1658                   pic_param->sequence_fields.bits.syncmarker << 12 |
1659                   interpolation_mode << 8 |
1660                   0 << 7 | /* FIXME: scale up or down ??? */
1661                   pic_param->range_reduction_frame << 6 |
1662                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1663                   overlap << 4 |
1664                   !pic_param->picture_fields.bits.is_first_field << 3 |
1665                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1666     OUT_BCS_BATCH(batch,
1667                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1668                   picture_type << 26 |
1669                   fcm << 24 |
1670                   alt_pq << 16 |
1671                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1672                   scale_factor << 0);
1673     OUT_BCS_BATCH(batch,
1674                   unified_mv_mode << 28 |
1675                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1676                   pic_param->fast_uvmc_flag << 26 |
1677                   ref_field_pic_polarity << 25 |
1678                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1679                   pic_param->reference_fields.bits.reference_distance << 20 |
1680                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1681                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1682                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1683                   alt_pquant_edge_mask << 4 |
1684                   alt_pquant_config << 2 |
1685                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1686                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1687     OUT_BCS_BATCH(batch,
1688                   !!pic_param->bitplane_present.value << 31 |
1689                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1690                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1691                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1692                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1693                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1694                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1695                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1696                   pic_param->mv_fields.bits.mv_table << 20 |
1697                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1698                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1699                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1700                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1701                   pic_param->mb_mode_table << 8 |
1702                   trans_ac_y << 6 |
1703                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1704                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1705                   pic_param->cbp_table << 0);
1706     ADVANCE_BCS_BATCH(batch);
1707 }
1708
1709 static void
1710 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1711                              struct decode_state *decode_state,
1712                              struct gen7_mfd_context *gen7_mfd_context)
1713 {
1714     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1715     VAPictureParameterBufferVC1 *pic_param;
1716     int picture_type;
1717     int intensitycomp_single_fwd = 0;
1718     int luma_scale1 = 0;
1719     int luma_shift1 = 0;
1720
1721     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1722     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1723     picture_type = pic_param->picture_fields.bits.picture_type;
1724
1725     if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID) {
1726         if (picture_type == 1 || picture_type == 2) { /* P/B picture */
1727             struct gen7_vc1_surface *gen7_vc1_surface = gen7_mfd_context->reference_surface[0].obj_surface->private_data;
1728             if (gen7_vc1_surface) {
1729                 intensitycomp_single_fwd = gen7_vc1_surface->intensity_compensation;
1730                 luma_scale1 = gen7_vc1_surface->luma_scale;
1731                 luma_shift1 = gen7_vc1_surface->luma_shift;
1732             }
1733         }
1734     }
1735
1736     BEGIN_BCS_BATCH(batch, 6);
1737     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1738     OUT_BCS_BATCH(batch,
1739                   0 << 14 | /* FIXME: double ??? */
1740                   0 << 12 |
1741                   intensitycomp_single_fwd << 10 |
1742                   0 << 8 |
1743                   0 << 4 | /* FIXME: interlace mode */
1744                   0);
1745     OUT_BCS_BATCH(batch,
1746                   luma_shift1 << 16 |
1747                   luma_scale1 << 0);
1748     OUT_BCS_BATCH(batch, 0);
1749     OUT_BCS_BATCH(batch, 0);
1750     OUT_BCS_BATCH(batch, 0);
1751     ADVANCE_BCS_BATCH(batch);
1752 }
1753
1754 static void
1755 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1756                               struct decode_state *decode_state,
1757                               struct gen7_mfd_context *gen7_mfd_context)
1758 {
1759     struct i965_driver_data *i965 = i965_driver_data(ctx);
1760     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1761     struct object_surface *obj_surface;
1762     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1763
1764     obj_surface = decode_state->render_object;
1765
1766     if (obj_surface && obj_surface->private_data) {
1767         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1768     }
1769
1770     obj_surface = decode_state->reference_objects[1];
1771
1772     if (obj_surface && obj_surface->private_data) {
1773         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1774     }
1775
1776     BEGIN_BCS_BATCH(batch, 7);
1777     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1778
1779     if (dmv_write_buffer)
1780         OUT_BCS_RELOC64(batch, dmv_write_buffer,
1781                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1782                         0);
1783     else {
1784         OUT_BCS_BATCH(batch, 0);
1785         OUT_BCS_BATCH(batch, 0);
1786     }
1787
1788     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1789
1790     if (dmv_read_buffer)
1791         OUT_BCS_RELOC64(batch, dmv_read_buffer,
1792                         I915_GEM_DOMAIN_INSTRUCTION, 0,
1793                         0);
1794     else {
1795         OUT_BCS_BATCH(batch, 0);
1796         OUT_BCS_BATCH(batch, 0);
1797     }
1798
1799     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1800
1801     ADVANCE_BCS_BATCH(batch);
1802 }
1803
1804 static int
1805 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1806 {
1807     int out_slice_data_bit_offset;
1808     int slice_header_size = in_slice_data_bit_offset / 8;
1809     int i, j;
1810
1811     if (profile != 3)
1812         out_slice_data_bit_offset = in_slice_data_bit_offset;
1813     else {
1814         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1815             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1816                 i++, j += 2;
1817             }
1818         }
1819
1820         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1821     }
1822
1823     return out_slice_data_bit_offset;
1824 }
1825
1826 static void
1827 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1828                         VAPictureParameterBufferVC1 *pic_param,
1829                         VASliceParameterBufferVC1 *slice_param,
1830                         VASliceParameterBufferVC1 *next_slice_param,
1831                         dri_bo *slice_data_bo,
1832                         struct gen7_mfd_context *gen7_mfd_context)
1833 {
1834     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1835     int next_slice_start_vert_pos;
1836     int macroblock_offset;
1837     uint8_t *slice_data = NULL;
1838
1839     dri_bo_map(slice_data_bo, 0);
1840     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1841     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1842                                                                slice_param->macroblock_offset,
1843                                                                pic_param->sequence_fields.bits.profile);
1844     dri_bo_unmap(slice_data_bo);
1845
1846     if (next_slice_param)
1847         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1848     else
1849         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1850
1851     BEGIN_BCS_BATCH(batch, 5);
1852     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1853     OUT_BCS_BATCH(batch,
1854                   slice_param->slice_data_size - (macroblock_offset >> 3));
1855     OUT_BCS_BATCH(batch,
1856                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1857     OUT_BCS_BATCH(batch,
1858                   slice_param->slice_vertical_position << 16 |
1859                   next_slice_start_vert_pos << 0);
1860     OUT_BCS_BATCH(batch,
1861                   (macroblock_offset & 0x7));
1862     ADVANCE_BCS_BATCH(batch);
1863 }
1864
1865 static void
1866 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1867                             struct decode_state *decode_state,
1868                             struct gen7_mfd_context *gen7_mfd_context)
1869 {
1870     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1871     VAPictureParameterBufferVC1 *pic_param;
1872     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1873     dri_bo *slice_data_bo;
1874     int i, j;
1875
1876     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1877     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1878
1879     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1880     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1881     intel_batchbuffer_emit_mi_flush(batch);
1882     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1883     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1884     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1885     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1886     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1887     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1888     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1889
1890     for (j = 0; j < decode_state->num_slice_params; j++) {
1891         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1892         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1893         slice_data_bo = decode_state->slice_datas[j]->bo;
1894         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1895
1896         if (j == decode_state->num_slice_params - 1)
1897             next_slice_group_param = NULL;
1898         else
1899             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1900
1901         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1902             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1903
1904             if (i < decode_state->slice_params[j]->num_elements - 1)
1905                 next_slice_param = slice_param + 1;
1906             else
1907                 next_slice_param = next_slice_group_param;
1908
1909             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1910             slice_param++;
1911         }
1912     }
1913
1914     intel_batchbuffer_end_atomic(batch);
1915     intel_batchbuffer_flush(batch);
1916 }
1917
1918 static void
1919 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1920                           struct decode_state *decode_state,
1921                           struct gen7_mfd_context *gen7_mfd_context)
1922 {
1923     struct object_surface *obj_surface;
1924     VAPictureParameterBufferJPEGBaseline *pic_param;
1925     int subsampling = SUBSAMPLE_YUV420;
1926     int fourcc = VA_FOURCC_IMC3;
1927
1928     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1929
1930     if (pic_param->num_components == 1) {
1931         subsampling = SUBSAMPLE_YUV400;
1932         fourcc = VA_FOURCC_Y800;
1933     } else if (pic_param->num_components == 3) {
1934         int h1 = pic_param->components[0].h_sampling_factor;
1935         int h2 = pic_param->components[1].h_sampling_factor;
1936         int h3 = pic_param->components[2].h_sampling_factor;
1937         int v1 = pic_param->components[0].v_sampling_factor;
1938         int v2 = pic_param->components[1].v_sampling_factor;
1939         int v3 = pic_param->components[2].v_sampling_factor;
1940
1941         if (h1 == 2 * h2 && h2 == h3 &&
1942             v1 == 2 * v2 && v2 == v3) {
1943             subsampling = SUBSAMPLE_YUV420;
1944             fourcc = VA_FOURCC_IMC3;
1945         } else if (h1 == 2 * h2  && h2 == h3 &&
1946                    v1 == v2 && v2 == v3) {
1947             subsampling = SUBSAMPLE_YUV422H;
1948             fourcc = VA_FOURCC_422H;
1949         } else if (h1 == h2 && h2 == h3 &&
1950                    v1 == v2  && v2 == v3) {
1951             subsampling = SUBSAMPLE_YUV444;
1952             fourcc = VA_FOURCC_444P;
1953         } else if (h1 == 4 * h2 && h2 ==  h3 &&
1954                    v1 == v2 && v2 == v3) {
1955             subsampling = SUBSAMPLE_YUV411;
1956             fourcc = VA_FOURCC_411P;
1957         } else if (h1 == h2 && h2 == h3 &&
1958                    v1 == 2 * v2 && v2 == v3) {
1959             subsampling = SUBSAMPLE_YUV422V;
1960             fourcc = VA_FOURCC_422V;
1961         } else
1962             assert(0);
1963     } else {
1964         assert(0);
1965     }
1966
1967     /* Current decoded picture */
1968     obj_surface = decode_state->render_object;
1969     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1970
1971     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1972     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1973     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1974     gen7_mfd_context->pre_deblocking_output.valid = 1;
1975
1976     gen7_mfd_context->post_deblocking_output.bo = NULL;
1977     gen7_mfd_context->post_deblocking_output.valid = 0;
1978
1979     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1980     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1981
1982     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1983     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1984
1985     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1986     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1987
1988     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1989     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1990
1991     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1992     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1993 }
1994
1995 static const int va_to_gen7_jpeg_rotation[4] = {
1996     GEN7_JPEG_ROTATION_0,
1997     GEN7_JPEG_ROTATION_90,
1998     GEN7_JPEG_ROTATION_180,
1999     GEN7_JPEG_ROTATION_270
2000 };
2001
2002 static void
2003 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
2004                         struct decode_state *decode_state,
2005                         struct gen7_mfd_context *gen7_mfd_context)
2006 {
2007     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2008     VAPictureParameterBufferJPEGBaseline *pic_param;
2009     int chroma_type = GEN7_YUV420;
2010     int frame_width_in_blks;
2011     int frame_height_in_blks;
2012
2013     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2014     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2015
2016     if (pic_param->num_components == 1)
2017         chroma_type = GEN7_YUV400;
2018     else if (pic_param->num_components == 3) {
2019         int h1 = pic_param->components[0].h_sampling_factor;
2020         int h2 = pic_param->components[1].h_sampling_factor;
2021         int h3 = pic_param->components[2].h_sampling_factor;
2022         int v1 = pic_param->components[0].v_sampling_factor;
2023         int v2 = pic_param->components[1].v_sampling_factor;
2024         int v3 = pic_param->components[2].v_sampling_factor;
2025
2026         if (h1 == 2 * h2 && h2 == h3 &&
2027             v1 == 2 * v2 && v2 == v3)
2028             chroma_type = GEN7_YUV420;
2029         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2030                  v1 == 1 && v2 == 1 && v3 == 1)
2031             chroma_type = GEN7_YUV422H_2Y;
2032         else if (h1 == h2 && h2 == h3 &&
2033                  v1 == v2 && v2 == v3)
2034             chroma_type = GEN7_YUV444;
2035         else if (h1 == 4 * h2 && h2 == h3 &&
2036                  v1 == v2 && v2 == v3)
2037             chroma_type = GEN7_YUV411;
2038         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2039                  v1 == 2 && v2 == 1 && v3 == 1)
2040             chroma_type = GEN7_YUV422V_2Y;
2041         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2042                  v1 == 2 && v2 == 2 && v3 == 2)
2043             chroma_type = GEN7_YUV422H_4Y;
2044         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2045                  v1 == 2 && v2 == 1 && v3 == 1)
2046             chroma_type = GEN7_YUV422V_4Y;
2047         else
2048             assert(0);
2049     }
2050
2051     if (chroma_type == GEN7_YUV400 ||
2052         chroma_type == GEN7_YUV444 ||
2053         chroma_type == GEN7_YUV422V_2Y) {
2054         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2055         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2056     } else if (chroma_type == GEN7_YUV411) {
2057         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2058         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2059     } else {
2060         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2061         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2062     }
2063
2064     BEGIN_BCS_BATCH(batch, 3);
2065     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2066     OUT_BCS_BATCH(batch,
2067                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2068                   (chroma_type << 0));
2069     OUT_BCS_BATCH(batch,
2070                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2071                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2072     ADVANCE_BCS_BATCH(batch);
2073 }
2074
2075 static const int va_to_gen7_jpeg_hufftable[2] = {
2076     MFX_HUFFTABLE_ID_Y,
2077     MFX_HUFFTABLE_ID_UV
2078 };
2079
2080 static void
2081 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2082                                struct decode_state *decode_state,
2083                                struct gen7_mfd_context *gen7_mfd_context,
2084                                int num_tables)
2085 {
2086     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2087     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2088     int index;
2089
2090     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2091         return;
2092
2093     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2094
2095     for (index = 0; index < num_tables; index++) {
2096         int id = va_to_gen7_jpeg_hufftable[index];
2097         if (!huffman_table->load_huffman_table[index])
2098             continue;
2099         BEGIN_BCS_BATCH(batch, 53);
2100         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2101         OUT_BCS_BATCH(batch, id);
2102         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2103         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2104         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2105         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2106         ADVANCE_BCS_BATCH(batch);
2107     }
2108 }
2109
2110 static const int va_to_gen7_jpeg_qm[5] = {
2111     -1,
2112     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2113     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2114     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2115     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2116 };
2117
2118 static void
2119 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2120                        struct decode_state *decode_state,
2121                        struct gen7_mfd_context *gen7_mfd_context)
2122 {
2123     VAPictureParameterBufferJPEGBaseline *pic_param;
2124     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2125     int index;
2126
2127     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2128         return;
2129
2130     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2131     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2132
2133     assert(pic_param->num_components <= 3);
2134
2135     for (index = 0; index < pic_param->num_components; index++) {
2136         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2137         int qm_type;
2138         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2139         unsigned char raster_qm[64];
2140         int j;
2141
2142         if (id > 4 || id < 1)
2143             continue;
2144
2145         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2146             continue;
2147
2148         qm_type = va_to_gen7_jpeg_qm[id];
2149
2150         for (j = 0; j < 64; j++)
2151             raster_qm[zigzag_direct[j]] = qm[j];
2152
2153         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2154     }
2155 }
2156
2157 static void
2158 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2159                          VAPictureParameterBufferJPEGBaseline *pic_param,
2160                          VASliceParameterBufferJPEGBaseline *slice_param,
2161                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2162                          dri_bo *slice_data_bo,
2163                          struct gen7_mfd_context *gen7_mfd_context)
2164 {
2165     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2166     int scan_component_mask = 0;
2167     int i;
2168
2169     assert(slice_param->num_components > 0);
2170     assert(slice_param->num_components < 4);
2171     assert(slice_param->num_components <= pic_param->num_components);
2172
2173     for (i = 0; i < slice_param->num_components; i++) {
2174         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2175         case 1:
2176             scan_component_mask |= (1 << 0);
2177             break;
2178         case 2:
2179             scan_component_mask |= (1 << 1);
2180             break;
2181         case 3:
2182             scan_component_mask |= (1 << 2);
2183             break;
2184         default:
2185             assert(0);
2186             break;
2187         }
2188     }
2189
2190     BEGIN_BCS_BATCH(batch, 6);
2191     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2192     OUT_BCS_BATCH(batch,
2193                   slice_param->slice_data_size);
2194     OUT_BCS_BATCH(batch,
2195                   slice_param->slice_data_offset);
2196     OUT_BCS_BATCH(batch,
2197                   slice_param->slice_horizontal_position << 16 |
2198                   slice_param->slice_vertical_position << 0);
2199     OUT_BCS_BATCH(batch,
2200                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2201                   (scan_component_mask << 27) |                 /* scan components */
2202                   (0 << 26) |   /* disable interrupt allowed */
2203                   (slice_param->num_mcus << 0));                /* MCU count */
2204     OUT_BCS_BATCH(batch,
2205                   (slice_param->restart_interval << 0));    /* RestartInterval */
2206     ADVANCE_BCS_BATCH(batch);
2207 }
2208
2209 /* Workaround for JPEG decoding on Ivybridge */
2210 #ifdef JPEG_WA
2211
2212 static struct {
2213     int width;
2214     int height;
2215     unsigned char data[32];
2216     int data_size;
2217     int data_bit_offset;
2218     int qp;
2219 } gen7_jpeg_wa_clip = {
2220     16,
2221     16,
2222     {
2223         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2224         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2225     },
2226     14,
2227     40,
2228     28,
2229 };
2230
2231 static void
2232 gen8_jpeg_wa_init(VADriverContextP ctx,
2233                   struct gen7_mfd_context *gen7_mfd_context)
2234 {
2235     struct i965_driver_data *i965 = i965_driver_data(ctx);
2236     VAStatus status;
2237     struct object_surface *obj_surface;
2238
2239     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2240         i965_DestroySurfaces(ctx,
2241                              &gen7_mfd_context->jpeg_wa_surface_id,
2242                              1);
2243
2244     status = i965_CreateSurfaces(ctx,
2245                                  gen7_jpeg_wa_clip.width,
2246                                  gen7_jpeg_wa_clip.height,
2247                                  VA_RT_FORMAT_YUV420,
2248                                  1,
2249                                  &gen7_mfd_context->jpeg_wa_surface_id);
2250     assert(status == VA_STATUS_SUCCESS);
2251
2252     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2253     assert(obj_surface);
2254     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2255     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2256
2257     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2258         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2259                                                                "JPEG WA data",
2260                                                                0x1000,
2261                                                                0x1000);
2262         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2263                        0,
2264                        gen7_jpeg_wa_clip.data_size,
2265                        gen7_jpeg_wa_clip.data);
2266     }
2267 }
2268
2269 static void
2270 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2271                               struct gen7_mfd_context *gen7_mfd_context)
2272 {
2273     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2274
2275     BEGIN_BCS_BATCH(batch, 5);
2276     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2277     OUT_BCS_BATCH(batch,
2278                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2279                   (MFD_MODE_VLD << 15) | /* VLD mode */
2280                   (0 << 10) | /* disable Stream-Out */
2281                   (0 << 9)  | /* Post Deblocking Output */
2282                   (1 << 8)  | /* Pre Deblocking Output */
2283                   (0 << 5)  | /* not in stitch mode */
2284                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2285                   (MFX_FORMAT_AVC << 0));
2286     OUT_BCS_BATCH(batch,
2287                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2288                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2289                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2290                   (0 << 1)  |
2291                   (0 << 0));
2292     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2293     OUT_BCS_BATCH(batch, 0); /* reserved */
2294     ADVANCE_BCS_BATCH(batch);
2295 }
2296
2297 static void
2298 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2299                            struct gen7_mfd_context *gen7_mfd_context)
2300 {
2301     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2302     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2303
2304     BEGIN_BCS_BATCH(batch, 6);
2305     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2306     OUT_BCS_BATCH(batch, 0);
2307     OUT_BCS_BATCH(batch,
2308                   ((obj_surface->orig_width - 1) << 18) |
2309                   ((obj_surface->orig_height - 1) << 4));
2310     OUT_BCS_BATCH(batch,
2311                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2312                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2313                   (0 << 22) | /* surface object control state, ignored */
2314                   ((obj_surface->width - 1) << 3) | /* pitch */
2315                   (0 << 2)  | /* must be 0 */
2316                   (1 << 1)  | /* must be tiled */
2317                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2318     OUT_BCS_BATCH(batch,
2319                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2320                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2321     OUT_BCS_BATCH(batch,
2322                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2323                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2324     ADVANCE_BCS_BATCH(batch);
2325 }
2326
2327 static void
2328 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2329                                  struct gen7_mfd_context *gen7_mfd_context)
2330 {
2331     struct i965_driver_data *i965 = i965_driver_data(ctx);
2332     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2333     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2334     dri_bo *intra_bo;
2335     int i;
2336
2337     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2338                             "intra row store",
2339                             128 * 64,
2340                             0x1000);
2341
2342     BEGIN_BCS_BATCH(batch, 61);
2343     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2344     OUT_BCS_RELOC64(batch,
2345                     obj_surface->bo,
2346                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2347                     0);
2348     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2349
2350
2351     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2352     OUT_BCS_BATCH(batch, 0);
2353     OUT_BCS_BATCH(batch, 0);
2354
2355     /* uncompressed-video & stream out 7-12 */
2356     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2357     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2358     OUT_BCS_BATCH(batch, 0);
2359     OUT_BCS_BATCH(batch, 0);
2360     OUT_BCS_BATCH(batch, 0);
2361     OUT_BCS_BATCH(batch, 0);
2362
2363     /* the DW 13-15 is for intra row store scratch */
2364     OUT_BCS_RELOC64(batch,
2365                     intra_bo,
2366                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2367                     0);
2368
2369     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2370
2371     /* the DW 16-18 is for deblocking filter */
2372     OUT_BCS_BATCH(batch, 0);
2373     OUT_BCS_BATCH(batch, 0);
2374     OUT_BCS_BATCH(batch, 0);
2375
2376     /* DW 19..50 */
2377     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2378         OUT_BCS_BATCH(batch, 0);
2379         OUT_BCS_BATCH(batch, 0);
2380     }
2381     OUT_BCS_BATCH(batch, 0);
2382
2383     /* the DW52-54 is for mb status address */
2384     OUT_BCS_BATCH(batch, 0);
2385     OUT_BCS_BATCH(batch, 0);
2386     OUT_BCS_BATCH(batch, 0);
2387     /* the DW56-60 is for ILDB & second ILDB address */
2388     OUT_BCS_BATCH(batch, 0);
2389     OUT_BCS_BATCH(batch, 0);
2390     OUT_BCS_BATCH(batch, 0);
2391     OUT_BCS_BATCH(batch, 0);
2392     OUT_BCS_BATCH(batch, 0);
2393     OUT_BCS_BATCH(batch, 0);
2394
2395     ADVANCE_BCS_BATCH(batch);
2396
2397     dri_bo_unreference(intra_bo);
2398 }
2399
2400 static void
2401 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2402                                      struct gen7_mfd_context *gen7_mfd_context)
2403 {
2404     struct i965_driver_data *i965 = i965_driver_data(ctx);
2405     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2406     dri_bo *bsd_mpc_bo, *mpr_bo;
2407
2408     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2409                               "bsd mpc row store",
2410                               11520, /* 1.5 * 120 * 64 */
2411                               0x1000);
2412
2413     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2414                           "mpr row store",
2415                           7680, /* 1. 0 * 120 * 64 */
2416                           0x1000);
2417
2418     BEGIN_BCS_BATCH(batch, 10);
2419     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2420
2421     OUT_BCS_RELOC64(batch,
2422                     bsd_mpc_bo,
2423                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2424                     0);
2425
2426     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2427
2428     OUT_BCS_RELOC64(batch,
2429                     mpr_bo,
2430                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2431                     0);
2432     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2433
2434     OUT_BCS_BATCH(batch, 0);
2435     OUT_BCS_BATCH(batch, 0);
2436     OUT_BCS_BATCH(batch, 0);
2437
2438     ADVANCE_BCS_BATCH(batch);
2439
2440     dri_bo_unreference(bsd_mpc_bo);
2441     dri_bo_unreference(mpr_bo);
2442 }
2443
2444 static void
2445 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2446                           struct gen7_mfd_context *gen7_mfd_context)
2447 {
2448
2449 }
2450
2451 static void
2452 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2453                            struct gen7_mfd_context *gen7_mfd_context)
2454 {
2455     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2456     int img_struct = 0;
2457     int mbaff_frame_flag = 0;
2458     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2459
2460     BEGIN_BCS_BATCH(batch, 16);
2461     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2462     OUT_BCS_BATCH(batch,
2463                   width_in_mbs * height_in_mbs);
2464     OUT_BCS_BATCH(batch,
2465                   ((height_in_mbs - 1) << 16) |
2466                   ((width_in_mbs - 1) << 0));
2467     OUT_BCS_BATCH(batch,
2468                   (0 << 24) |
2469                   (0 << 16) |
2470                   (0 << 14) |
2471                   (0 << 13) |
2472                   (0 << 12) | /* differ from GEN6 */
2473                   (0 << 10) |
2474                   (img_struct << 8));
2475     OUT_BCS_BATCH(batch,
2476                   (1 << 10) | /* 4:2:0 */
2477                   (1 << 7) |  /* CABAC */
2478                   (0 << 6) |
2479                   (0 << 5) |
2480                   (0 << 4) |
2481                   (0 << 3) |
2482                   (1 << 2) |
2483                   (mbaff_frame_flag << 1) |
2484                   (0 << 0));
2485     OUT_BCS_BATCH(batch, 0);
2486     OUT_BCS_BATCH(batch, 0);
2487     OUT_BCS_BATCH(batch, 0);
2488     OUT_BCS_BATCH(batch, 0);
2489     OUT_BCS_BATCH(batch, 0);
2490     OUT_BCS_BATCH(batch, 0);
2491     OUT_BCS_BATCH(batch, 0);
2492     OUT_BCS_BATCH(batch, 0);
2493     OUT_BCS_BATCH(batch, 0);
2494     OUT_BCS_BATCH(batch, 0);
2495     OUT_BCS_BATCH(batch, 0);
2496     ADVANCE_BCS_BATCH(batch);
2497 }
2498
2499 static void
2500 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2501                                   struct gen7_mfd_context *gen7_mfd_context)
2502 {
2503     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2504     int i;
2505
2506     BEGIN_BCS_BATCH(batch, 71);
2507     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2508
2509     /* reference surfaces 0..15 */
2510     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2511         OUT_BCS_BATCH(batch, 0); /* top */
2512         OUT_BCS_BATCH(batch, 0); /* bottom */
2513     }
2514
2515     OUT_BCS_BATCH(batch, 0);
2516
2517     /* the current decoding frame/field */
2518     OUT_BCS_BATCH(batch, 0); /* top */
2519     OUT_BCS_BATCH(batch, 0);
2520     OUT_BCS_BATCH(batch, 0);
2521
2522     /* POC List */
2523     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2524         OUT_BCS_BATCH(batch, 0);
2525         OUT_BCS_BATCH(batch, 0);
2526     }
2527
2528     OUT_BCS_BATCH(batch, 0);
2529     OUT_BCS_BATCH(batch, 0);
2530
2531     ADVANCE_BCS_BATCH(batch);
2532 }
2533
2534 static void
2535 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2536                                      struct gen7_mfd_context *gen7_mfd_context)
2537 {
2538     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2539
2540     BEGIN_BCS_BATCH(batch, 11);
2541     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2542     OUT_BCS_RELOC64(batch,
2543                     gen7_mfd_context->jpeg_wa_slice_data_bo,
2544                     I915_GEM_DOMAIN_INSTRUCTION, 0,
2545                     0);
2546     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2547     OUT_BCS_BATCH(batch, 0);
2548     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2549     OUT_BCS_BATCH(batch, 0);
2550     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2551     OUT_BCS_BATCH(batch, 0);
2552     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2553     OUT_BCS_BATCH(batch, 0);
2554     ADVANCE_BCS_BATCH(batch);
2555 }
2556
2557 static void
2558 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2559                             struct gen7_mfd_context *gen7_mfd_context)
2560 {
2561     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2562
2563     /* the input bitsteam format on GEN7 differs from GEN6 */
2564     BEGIN_BCS_BATCH(batch, 6);
2565     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2566     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2567     OUT_BCS_BATCH(batch, 0);
2568     OUT_BCS_BATCH(batch,
2569                   (0 << 31) |
2570                   (0 << 14) |
2571                   (0 << 12) |
2572                   (0 << 10) |
2573                   (0 << 8));
2574     OUT_BCS_BATCH(batch,
2575                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2576                   (0 << 5)  |
2577                   (0 << 4)  |
2578                   (1 << 3) | /* LastSlice Flag */
2579                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2580     OUT_BCS_BATCH(batch, 0);
2581     ADVANCE_BCS_BATCH(batch);
2582 }
2583
2584 static void
2585 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2586                              struct gen7_mfd_context *gen7_mfd_context)
2587 {
2588     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2589     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2590     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2591     int first_mb_in_slice = 0;
2592     int slice_type = SLICE_TYPE_I;
2593
2594     BEGIN_BCS_BATCH(batch, 11);
2595     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2596     OUT_BCS_BATCH(batch, slice_type);
2597     OUT_BCS_BATCH(batch,
2598                   (num_ref_idx_l1 << 24) |
2599                   (num_ref_idx_l0 << 16) |
2600                   (0 << 8) |
2601                   (0 << 0));
2602     OUT_BCS_BATCH(batch,
2603                   (0 << 29) |
2604                   (1 << 27) |   /* disable Deblocking */
2605                   (0 << 24) |
2606                   (gen7_jpeg_wa_clip.qp << 16) |
2607                   (0 << 8) |
2608                   (0 << 0));
2609     OUT_BCS_BATCH(batch,
2610                   (slice_ver_pos << 24) |
2611                   (slice_hor_pos << 16) |
2612                   (first_mb_in_slice << 0));
2613     OUT_BCS_BATCH(batch,
2614                   (next_slice_ver_pos << 16) |
2615                   (next_slice_hor_pos << 0));
2616     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2617     OUT_BCS_BATCH(batch, 0);
2618     OUT_BCS_BATCH(batch, 0);
2619     OUT_BCS_BATCH(batch, 0);
2620     OUT_BCS_BATCH(batch, 0);
2621     ADVANCE_BCS_BATCH(batch);
2622 }
2623
2624 static void
2625 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2626                  struct gen7_mfd_context *gen7_mfd_context)
2627 {
2628     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2629     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2630     intel_batchbuffer_emit_mi_flush(batch);
2631     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2632     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2633     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2634     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2635     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2636     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2637     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2638
2639     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2640     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2641     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2642 }
2643
2644 #endif
2645
2646 void
2647 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2648                              struct decode_state *decode_state,
2649                              struct gen7_mfd_context *gen7_mfd_context)
2650 {
2651     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2652     VAPictureParameterBufferJPEGBaseline *pic_param;
2653     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2654     dri_bo *slice_data_bo;
2655     int i, j, max_selector = 0;
2656
2657     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2658     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2659
2660     /* Currently only support Baseline DCT */
2661     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2662     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2663 #ifdef JPEG_WA
2664     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2665 #endif
2666     intel_batchbuffer_emit_mi_flush(batch);
2667     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2668     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2669     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2670     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2671     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2672
2673     for (j = 0; j < decode_state->num_slice_params; j++) {
2674         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2675         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2676         slice_data_bo = decode_state->slice_datas[j]->bo;
2677         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2678
2679         if (j == decode_state->num_slice_params - 1)
2680             next_slice_group_param = NULL;
2681         else
2682             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2683
2684         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2685             int component;
2686
2687             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2688
2689             if (i < decode_state->slice_params[j]->num_elements - 1)
2690                 next_slice_param = slice_param + 1;
2691             else
2692                 next_slice_param = next_slice_group_param;
2693
2694             for (component = 0; component < slice_param->num_components; component++) {
2695                 if (max_selector < slice_param->components[component].dc_table_selector)
2696                     max_selector = slice_param->components[component].dc_table_selector;
2697
2698                 if (max_selector < slice_param->components[component].ac_table_selector)
2699                     max_selector = slice_param->components[component].ac_table_selector;
2700             }
2701
2702             slice_param++;
2703         }
2704     }
2705
2706     assert(max_selector < 2);
2707     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2708
2709     for (j = 0; j < decode_state->num_slice_params; j++) {
2710         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2711         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2712         slice_data_bo = decode_state->slice_datas[j]->bo;
2713         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2714
2715         if (j == decode_state->num_slice_params - 1)
2716             next_slice_group_param = NULL;
2717         else
2718             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2719
2720         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2721             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2722
2723             if (i < decode_state->slice_params[j]->num_elements - 1)
2724                 next_slice_param = slice_param + 1;
2725             else
2726                 next_slice_param = next_slice_group_param;
2727
2728             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2729             slice_param++;
2730         }
2731     }
2732
2733     intel_batchbuffer_end_atomic(batch);
2734     intel_batchbuffer_flush(batch);
2735 }
2736
2737 static const int vp8_dc_qlookup[128] = {
2738     4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2739     18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2740     29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2741     44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2742     59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2743     75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2744     91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2745     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2746 };
2747
2748 static const int vp8_ac_qlookup[128] = {
2749     4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2750     20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2751     36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2752     52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2753     78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2754     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2755     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2756     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2757 };
2758
2759 static inline unsigned int vp8_clip_quantization_index(int index)
2760 {
2761     if (index > 127)
2762         return 127;
2763     else if (index < 0)
2764         return 0;
2765
2766     return index;
2767 }
2768
2769 static void
2770 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2771                          struct decode_state *decode_state,
2772                          struct gen7_mfd_context *gen7_mfd_context)
2773 {
2774     struct object_surface *obj_surface;
2775     struct i965_driver_data *i965 = i965_driver_data(ctx);
2776     dri_bo *bo;
2777     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2778     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2779     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2780
2781     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2782     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2783
2784     intel_update_vp8_frame_store_index(ctx,
2785                                        decode_state,
2786                                        pic_param,
2787                                        gen7_mfd_context->reference_surface);
2788
2789     /* Current decoded picture */
2790     obj_surface = decode_state->render_object;
2791     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2792
2793     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2794     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2795     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2796     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2797
2798     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2799     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2800     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2801     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2802
2803     intel_ensure_vp8_segmentation_buffer(ctx,
2804                                          &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2805
2806     /* The same as AVC */
2807     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2808     bo = dri_bo_alloc(i965->intel.bufmgr,
2809                       "intra row store",
2810                       width_in_mbs * 64,
2811                       0x1000);
2812     assert(bo);
2813     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2814     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2815
2816     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2817     bo = dri_bo_alloc(i965->intel.bufmgr,
2818                       "deblocking filter row store",
2819                       width_in_mbs * 64 * 4,
2820                       0x1000);
2821     assert(bo);
2822     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2823     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2824
2825     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2826     bo = dri_bo_alloc(i965->intel.bufmgr,
2827                       "bsd mpc row store",
2828                       width_in_mbs * 64 * 2,
2829                       0x1000);
2830     assert(bo);
2831     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2832     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2833
2834     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2835     bo = dri_bo_alloc(i965->intel.bufmgr,
2836                       "mpr row store",
2837                       width_in_mbs * 64 * 2,
2838                       0x1000);
2839     assert(bo);
2840     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2841     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2842
2843     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2844 }
2845
2846 static void
2847 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2848                        struct decode_state *decode_state,
2849                        struct gen7_mfd_context *gen7_mfd_context)
2850 {
2851     struct i965_driver_data *i965 = i965_driver_data(ctx);
2852     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2853     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2854     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2855     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2856     dri_bo *probs_bo = decode_state->probability_data->bo;
2857     int i, j, log2num;
2858     unsigned int quantization_value[4][6];
2859
2860     /* There is no safe way to error out if the segmentation buffer
2861        could not be allocated. So, instead of aborting, simply decode
2862        something even if the result may look totally inacurate */
2863     const unsigned int enable_segmentation =
2864         pic_param->pic_fields.bits.segmentation_enabled &&
2865         gen7_mfd_context->segmentation_buffer.valid;
2866
2867     log2num = (int)log2(slice_param->num_of_partitions - 1);
2868
2869     BEGIN_BCS_BATCH(batch, 38);
2870     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2871     OUT_BCS_BATCH(batch,
2872                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2873                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2874     OUT_BCS_BATCH(batch,
2875                   log2num << 24 |
2876                   pic_param->pic_fields.bits.sharpness_level << 16 |
2877                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2878                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2879                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2880                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2881                   (enable_segmentation &&
2882                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2883                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2884                   (enable_segmentation &&
2885                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2886                   (enable_segmentation &&
2887                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2888                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2889                   pic_param->pic_fields.bits.filter_type << 4 |
2890                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2891                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2892
2893     OUT_BCS_BATCH(batch,
2894                   pic_param->loop_filter_level[3] << 24 |
2895                   pic_param->loop_filter_level[2] << 16 |
2896                   pic_param->loop_filter_level[1] <<  8 |
2897                   pic_param->loop_filter_level[0] <<  0);
2898
2899     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2900     for (i = 0; i < 4; i++) {
2901         quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2902         quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2903         quantization_value[i][2] = 2 * vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])]; /*y2dc*/
2904         /* 101581>>16 is equivalent to 155/100 */
2905         quantization_value[i][3] = (101581 * vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16; /*y2ac*/
2906         quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2907         quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2908
2909         quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2910         quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2911
2912         OUT_BCS_BATCH(batch,
2913                       quantization_value[i][0] << 16 | /* Y1AC */
2914                       quantization_value[i][1] <<  0); /* Y1DC */
2915         OUT_BCS_BATCH(batch,
2916                       quantization_value[i][5] << 16 | /* UVAC */
2917                       quantization_value[i][4] <<  0); /* UVDC */
2918         OUT_BCS_BATCH(batch,
2919                       quantization_value[i][3] << 16 | /* Y2AC */
2920                       quantization_value[i][2] <<  0); /* Y2DC */
2921     }
2922
2923     /* CoeffProbability table for non-key frame, DW16-DW18 */
2924     if (probs_bo) {
2925         OUT_BCS_RELOC64(batch, probs_bo,
2926                         0, I915_GEM_DOMAIN_INSTRUCTION,
2927                         0);
2928         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2929     } else {
2930         OUT_BCS_BATCH(batch, 0);
2931         OUT_BCS_BATCH(batch, 0);
2932         OUT_BCS_BATCH(batch, 0);
2933     }
2934
2935     OUT_BCS_BATCH(batch,
2936                   pic_param->mb_segment_tree_probs[2] << 16 |
2937                   pic_param->mb_segment_tree_probs[1] <<  8 |
2938                   pic_param->mb_segment_tree_probs[0] <<  0);
2939
2940     OUT_BCS_BATCH(batch,
2941                   pic_param->prob_skip_false << 24 |
2942                   pic_param->prob_intra      << 16 |
2943                   pic_param->prob_last       <<  8 |
2944                   pic_param->prob_gf         <<  0);
2945
2946     OUT_BCS_BATCH(batch,
2947                   pic_param->y_mode_probs[3] << 24 |
2948                   pic_param->y_mode_probs[2] << 16 |
2949                   pic_param->y_mode_probs[1] <<  8 |
2950                   pic_param->y_mode_probs[0] <<  0);
2951
2952     OUT_BCS_BATCH(batch,
2953                   pic_param->uv_mode_probs[2] << 16 |
2954                   pic_param->uv_mode_probs[1] <<  8 |
2955                   pic_param->uv_mode_probs[0] <<  0);
2956
2957     /* MV update value, DW23-DW32 */
2958     for (i = 0; i < 2; i++) {
2959         for (j = 0; j < 20; j += 4) {
2960             OUT_BCS_BATCH(batch,
2961                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2962                           pic_param->mv_probs[i][j + 2] << 16 |
2963                           pic_param->mv_probs[i][j + 1] <<  8 |
2964                           pic_param->mv_probs[i][j + 0] <<  0);
2965         }
2966     }
2967
2968     OUT_BCS_BATCH(batch,
2969                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2970                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2971                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2972                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2973
2974     OUT_BCS_BATCH(batch,
2975                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2976                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2977                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2978                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2979
2980     /* segmentation id stream base address, DW35-DW37 */
2981     if (enable_segmentation) {
2982         OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
2983                         0, I915_GEM_DOMAIN_INSTRUCTION,
2984                         0);
2985         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2986     } else {
2987         OUT_BCS_BATCH(batch, 0);
2988         OUT_BCS_BATCH(batch, 0);
2989         OUT_BCS_BATCH(batch, 0);
2990     }
2991     ADVANCE_BCS_BATCH(batch);
2992 }
2993
2994 static void
2995 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2996                         VAPictureParameterBufferVP8 *pic_param,
2997                         VASliceParameterBufferVP8 *slice_param,
2998                         dri_bo *slice_data_bo,
2999                         struct gen7_mfd_context *gen7_mfd_context)
3000 {
3001     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3002     int i, log2num;
3003     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7) >> 3);
3004     unsigned int used_bits = 8 - pic_param->bool_coder_ctx.count;
3005     unsigned int partition_size_0 = slice_param->partition_size[0];
3006
3007     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
3008     if (used_bits == 8) {
3009         used_bits = 0;
3010         offset += 1;
3011         partition_size_0 -= 1;
3012     }
3013
3014     assert(slice_param->num_of_partitions >= 2);
3015     assert(slice_param->num_of_partitions <= 9);
3016
3017     log2num = (int)log2(slice_param->num_of_partitions - 1);
3018
3019     BEGIN_BCS_BATCH(batch, 22);
3020     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
3021     OUT_BCS_BATCH(batch,
3022                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
3023                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
3024                   log2num << 4 |
3025                   (slice_param->macroblock_offset & 0x7));
3026     OUT_BCS_BATCH(batch,
3027                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3028                   0);
3029
3030     OUT_BCS_BATCH(batch, partition_size_0 + 1);
3031     OUT_BCS_BATCH(batch, offset);
3032     //partion sizes in bytes are present after the above first partition when there are more than one token partition
3033     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3034     for (i = 1; i < 9; i++) {
3035         if (i < slice_param->num_of_partitions) {
3036             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3037             OUT_BCS_BATCH(batch, offset);
3038         } else {
3039             OUT_BCS_BATCH(batch, 0);
3040             OUT_BCS_BATCH(batch, 0);
3041         }
3042
3043         offset += slice_param->partition_size[i];
3044     }
3045
3046     OUT_BCS_BATCH(batch, 0); /* concealment method */
3047
3048     ADVANCE_BCS_BATCH(batch);
3049 }
3050
3051 void
3052 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3053                             struct decode_state *decode_state,
3054                             struct gen7_mfd_context *gen7_mfd_context)
3055 {
3056     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3057     VAPictureParameterBufferVP8 *pic_param;
3058     VASliceParameterBufferVP8 *slice_param;
3059     dri_bo *slice_data_bo;
3060
3061     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3062     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3063
3064     /* one slice per frame */
3065     if (decode_state->num_slice_params != 1 ||
3066         (!decode_state->slice_params ||
3067          !decode_state->slice_params[0] ||
3068          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3069         (!decode_state->slice_datas ||
3070          !decode_state->slice_datas[0] ||
3071          !decode_state->slice_datas[0]->bo) ||
3072         !decode_state->probability_data) {
3073         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3074
3075         return;
3076     }
3077
3078     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3079     slice_data_bo = decode_state->slice_datas[0]->bo;
3080
3081     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3082     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3083     intel_batchbuffer_emit_mi_flush(batch);
3084     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3085     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3086     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3087     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3088     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3089     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3090     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3091     intel_batchbuffer_end_atomic(batch);
3092     intel_batchbuffer_flush(batch);
3093 }
3094
3095 static VAStatus
3096 gen8_mfd_decode_picture(VADriverContextP ctx,
3097                         VAProfile profile,
3098                         union codec_state *codec_state,
3099                         struct hw_context *hw_context)
3100
3101 {
3102     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3103     struct decode_state *decode_state = &codec_state->decode;
3104     VAStatus vaStatus;
3105
3106     assert(gen7_mfd_context);
3107
3108     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3109
3110     if (vaStatus != VA_STATUS_SUCCESS)
3111         goto out;
3112
3113     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3114
3115     switch (profile) {
3116     case VAProfileMPEG2Simple:
3117     case VAProfileMPEG2Main:
3118         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3119         break;
3120
3121     case VAProfileH264ConstrainedBaseline:
3122     case VAProfileH264Main:
3123     case VAProfileH264High:
3124     case VAProfileH264StereoHigh:
3125     case VAProfileH264MultiviewHigh:
3126         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3127         break;
3128
3129     case VAProfileVC1Simple:
3130     case VAProfileVC1Main:
3131     case VAProfileVC1Advanced:
3132         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3133         break;
3134
3135     case VAProfileJPEGBaseline:
3136         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3137         break;
3138
3139     case VAProfileVP8Version0_3:
3140         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3141         break;
3142
3143     default:
3144         assert(0);
3145         break;
3146     }
3147
3148     vaStatus = VA_STATUS_SUCCESS;
3149
3150 out:
3151     return vaStatus;
3152 }
3153
3154 static void
3155 gen8_mfd_context_destroy(void *hw_context)
3156 {
3157     VADriverContextP ctx;
3158     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3159
3160     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3161
3162     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3163     gen7_mfd_context->post_deblocking_output.bo = NULL;
3164
3165     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3166     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3167
3168     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3169     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3170
3171     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3172     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3173
3174     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3175     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3176
3177     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3178     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3179
3180     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3181     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3182
3183     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3184     gen7_mfd_context->segmentation_buffer.bo = NULL;
3185
3186     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3187
3188     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3189         i965_DestroySurfaces(ctx,
3190                              &gen7_mfd_context->jpeg_wa_surface_id,
3191                              1);
3192         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3193     }
3194
3195     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3196     free(gen7_mfd_context);
3197 }
3198
3199 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3200                                         struct gen7_mfd_context *gen7_mfd_context)
3201 {
3202     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3203     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3204     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3205     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3206 }
3207
3208 struct hw_context *
3209 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3210 {
3211     struct intel_driver_data *intel = intel_driver_data(ctx);
3212     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3213     int i;
3214
3215     if (!gen7_mfd_context)
3216         return NULL;
3217
3218     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3219     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3220     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3221
3222     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3223         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3224         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3225     }
3226
3227     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3228     gen7_mfd_context->segmentation_buffer.valid = 0;
3229
3230     switch (obj_config->profile) {
3231     case VAProfileMPEG2Simple:
3232     case VAProfileMPEG2Main:
3233         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3234         break;
3235
3236     case VAProfileH264ConstrainedBaseline:
3237     case VAProfileH264Main:
3238     case VAProfileH264High:
3239     case VAProfileH264StereoHigh:
3240     case VAProfileH264MultiviewHigh:
3241         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3242         break;
3243     default:
3244         break;
3245     }
3246
3247     gen7_mfd_context->driver_context = ctx;
3248     return (struct hw_context *)gen7_mfd_context;
3249 }