OSDN Git Service

genX_mfd: fix intensity compensation for frame-interlaced content
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV     2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77
78         if (!gen7_avc_surface)
79             return;
80
81         gen7_avc_surface->base.frame_store_id = -1;
82         assert((obj_surface->size & 0x3f) == 0);
83         obj_surface->private_data = gen7_avc_surface;
84     }
85
86     /* DMV buffers now relate to the whole frame, irrespective of
87        field coding modes */
88     if (gen7_avc_surface->dmv_top == NULL) {
89         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90                                                  "direct mv w/r buffer",
91                                                  width_in_mbs * height_in_mbs * 128,
92                                                  0x1000);
93         assert(gen7_avc_surface->dmv_top);
94     }
95 }
96
97 static void
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99                           struct decode_state *decode_state,
100                           int standard_select,
101                           struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG ||
109            standard_select == MFX_FORMAT_VP8);
110
111     BEGIN_BCS_BATCH(batch, 5);
112     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
113     OUT_BCS_BATCH(batch,
114                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
115                   (MFD_MODE_VLD << 15) | /* VLD mode */
116                   (0 << 10) | /* disable Stream-Out */
117                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
118                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
119                   (0 << 5)  | /* not in stitch mode */
120                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
121                   (standard_select << 0));
122     OUT_BCS_BATCH(batch,
123                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
124                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
125                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
126                   (0 << 1)  |
127                   (0 << 0));
128     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
129     OUT_BCS_BATCH(batch, 0); /* reserved */
130     ADVANCE_BCS_BATCH(batch);
131 }
132
133 static void
134 gen8_mfd_surface_state(VADriverContextP ctx,
135                        struct decode_state *decode_state,
136                        int standard_select,
137                        struct gen7_mfd_context *gen7_mfd_context)
138 {
139     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140     struct object_surface *obj_surface = decode_state->render_object;
141     unsigned int y_cb_offset;
142     unsigned int y_cr_offset;
143     unsigned int surface_format;
144
145     assert(obj_surface);
146
147     y_cb_offset = obj_surface->y_cb_offset;
148     y_cr_offset = obj_surface->y_cr_offset;
149
150     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151                      MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
152
153     BEGIN_BCS_BATCH(batch, 6);
154     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155     OUT_BCS_BATCH(batch, 0);
156     OUT_BCS_BATCH(batch,
157                   ((obj_surface->orig_height - 1) << 18) |
158                   ((obj_surface->orig_width - 1) << 4));
159     OUT_BCS_BATCH(batch,
160                   (surface_format << 28) | /* 420 planar YUV surface */
161                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162                   (0 << 22) | /* surface object control state, ignored */
163                   ((obj_surface->width - 1) << 3) | /* pitch */
164                   (0 << 2)  | /* must be 0 */
165                   (1 << 1)  | /* must be tiled */
166                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
167     OUT_BCS_BATCH(batch,
168                   (0 << 16) | /* X offset for U(Cb), must be 0 */
169                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
170     OUT_BCS_BATCH(batch,
171                   (0 << 16) | /* X offset for V(Cr), must be 0 */
172                   ((standard_select == MFX_FORMAT_JPEG ? y_cr_offset : 0) << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173     ADVANCE_BCS_BATCH(batch);
174 }
175
176 static void
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178                              struct decode_state *decode_state,
179                              int standard_select,
180                              struct gen7_mfd_context *gen7_mfd_context)
181 {
182     struct i965_driver_data *i965 = i965_driver_data(ctx);
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188     /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                         0);
193     else {
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197     }
198     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
199
200     /* Post-debloing 4-6 */
201     if (gen7_mfd_context->post_deblocking_output.valid)
202         OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
203                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                         0);
205     else {
206         OUT_BCS_BATCH(batch, 0);
207
208         OUT_BCS_BATCH(batch, 0);
209     }
210     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
211
212     /* uncompressed-video & stream out 7-12 */
213     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
214     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
215     OUT_BCS_BATCH(batch, 0);
216     OUT_BCS_BATCH(batch, 0);
217     OUT_BCS_BATCH(batch, 0);
218     OUT_BCS_BATCH(batch, 0);
219
220     /* intra row-store scratch 13-15 */
221     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
222         OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
223                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
224                         0);
225     else {
226         OUT_BCS_BATCH(batch, 0);
227
228         OUT_BCS_BATCH(batch, 0);
229     }
230     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
231
232     /* deblocking-filter-row-store 16-18 */
233     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
234         OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
235                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
236                         0);
237     else {
238         OUT_BCS_BATCH(batch, 0);
239         OUT_BCS_BATCH(batch, 0);
240     }
241
242     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
243
244     /* DW 19..50 */
245     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
246         struct object_surface *obj_surface;
247
248         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
249             gen7_mfd_context->reference_surface[i].obj_surface &&
250             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
251             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
252
253             OUT_BCS_RELOC64(batch, obj_surface->bo,
254                             I915_GEM_DOMAIN_INSTRUCTION, 0,
255                             0);
256         } else {
257             OUT_BCS_BATCH(batch, 0);
258             OUT_BCS_BATCH(batch, 0);
259         }
260
261     }
262
263     /* reference property 51 */
264     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
265
266     /* Macroblock status & ILDB 52-57 */
267     OUT_BCS_BATCH(batch, 0);
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271     OUT_BCS_BATCH(batch, 0);
272     OUT_BCS_BATCH(batch, 0);
273
274     /* the second Macroblock status 58-60 */
275     OUT_BCS_BATCH(batch, 0);
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch, 0);
278
279     ADVANCE_BCS_BATCH(batch);
280 }
281
282 static void
283 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
284                                  dri_bo *slice_data_bo,
285                                  int standard_select,
286                                  struct gen7_mfd_context *gen7_mfd_context)
287 {
288     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
289     struct i965_driver_data *i965 = i965_driver_data(ctx);
290
291     BEGIN_BCS_BATCH(batch, 26);
292     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
293     /* MFX In BS 1-5 */
294     OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
295     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
296     /* Upper bound 4-5 */
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299
300     /* MFX indirect MV 6-10 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307     /* MFX IT_COFF 11-15 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314     /* MFX IT_DBLK 16-20 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321     /* MFX PAK_BSE object for encoder 21-25 */
322     OUT_BCS_BATCH(batch, 0);
323     OUT_BCS_BATCH(batch, 0);
324     OUT_BCS_BATCH(batch, 0);
325     OUT_BCS_BATCH(batch, 0);
326     OUT_BCS_BATCH(batch, 0);
327
328     ADVANCE_BCS_BATCH(batch);
329 }
330
331 static void
332 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
333                                  struct decode_state *decode_state,
334                                  int standard_select,
335                                  struct gen7_mfd_context *gen7_mfd_context)
336 {
337     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
338     struct i965_driver_data *i965 = i965_driver_data(ctx);
339
340     BEGIN_BCS_BATCH(batch, 10);
341     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
342
343     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
344         OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
345                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
346                         0);
347     else {
348         OUT_BCS_BATCH(batch, 0);
349         OUT_BCS_BATCH(batch, 0);
350     }
351
352     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
353     /* MPR Row Store Scratch buffer 4-6 */
354     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
355         OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
356                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
357                         0);
358     else {
359         OUT_BCS_BATCH(batch, 0);
360         OUT_BCS_BATCH(batch, 0);
361     }
362
363     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
364
365     /* Bitplane 7-9 */
366     if (gen7_mfd_context->bitplane_read_buffer.valid)
367         OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
368                         I915_GEM_DOMAIN_INSTRUCTION, 0,
369                         0);
370     else {
371         OUT_BCS_BATCH(batch, 0);
372         OUT_BCS_BATCH(batch, 0);
373     }
374     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
375     ADVANCE_BCS_BATCH(batch);
376 }
377
378 static void
379 gen8_mfd_qm_state(VADriverContextP ctx,
380                   int qm_type,
381                   unsigned char *qm,
382                   int qm_length,
383                   struct gen7_mfd_context *gen7_mfd_context)
384 {
385     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
386     unsigned int qm_buffer[16];
387
388     assert(qm_length <= 16 * 4);
389     memcpy(qm_buffer, qm, qm_length);
390
391     BEGIN_BCS_BATCH(batch, 18);
392     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
393     OUT_BCS_BATCH(batch, qm_type << 0);
394     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
395     ADVANCE_BCS_BATCH(batch);
396 }
397
398 static void
399 gen8_mfd_avc_img_state(VADriverContextP ctx,
400                        struct decode_state *decode_state,
401                        struct gen7_mfd_context *gen7_mfd_context)
402 {
403     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
404     int img_struct;
405     int mbaff_frame_flag;
406     unsigned int width_in_mbs, height_in_mbs;
407     VAPictureParameterBufferH264 *pic_param;
408
409     assert(decode_state->pic_param && decode_state->pic_param->buffer);
410     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
412
413     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
414         img_struct = 1;
415     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
416         img_struct = 3;
417     else
418         img_struct = 0;
419
420     if ((img_struct & 0x1) == 0x1) {
421         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
422     } else {
423         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
424     }
425
426     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
427         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
428         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
429     } else {
430         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
431     }
432
433     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
434                         !pic_param->pic_fields.bits.field_pic_flag);
435
436     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
437     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
438
439     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
440     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
441            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
442     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
443
444     BEGIN_BCS_BATCH(batch, 17);
445     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
446     OUT_BCS_BATCH(batch,
447                   (width_in_mbs * height_in_mbs - 1));
448     OUT_BCS_BATCH(batch,
449                   ((height_in_mbs - 1) << 16) |
450                   ((width_in_mbs - 1) << 0));
451     OUT_BCS_BATCH(batch,
452                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
453                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
454                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
455                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
456                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
457                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
458                   (img_struct << 8));
459     OUT_BCS_BATCH(batch,
460                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
461                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
462                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
463                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
464                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
465                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
466                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
467                   (mbaff_frame_flag << 1) |
468                   (pic_param->pic_fields.bits.field_pic_flag << 0));
469     OUT_BCS_BATCH(batch, 0);
470     OUT_BCS_BATCH(batch, 0);
471     OUT_BCS_BATCH(batch, 0);
472     OUT_BCS_BATCH(batch, 0);
473     OUT_BCS_BATCH(batch, 0);
474     OUT_BCS_BATCH(batch, 0);
475     OUT_BCS_BATCH(batch, 0);
476     OUT_BCS_BATCH(batch, 0);
477     OUT_BCS_BATCH(batch, 0);
478     OUT_BCS_BATCH(batch, 0);
479     OUT_BCS_BATCH(batch, 0);
480     OUT_BCS_BATCH(batch, 0);
481     ADVANCE_BCS_BATCH(batch);
482 }
483
484 static void
485 gen8_mfd_avc_qm_state(VADriverContextP ctx,
486                       struct decode_state *decode_state,
487                       struct gen7_mfd_context *gen7_mfd_context)
488 {
489     VAIQMatrixBufferH264 *iq_matrix;
490     VAPictureParameterBufferH264 *pic_param;
491
492     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
493         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
494     else
495         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
496
497     assert(decode_state->pic_param && decode_state->pic_param->buffer);
498     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
499
500     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
501     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
502
503     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
504         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
505         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
506     }
507 }
508
509 static inline void
510 gen8_mfd_avc_picid_state(VADriverContextP ctx,
511                          struct decode_state *decode_state,
512                          struct gen7_mfd_context *gen7_mfd_context)
513 {
514     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
515                                gen7_mfd_context->reference_surface);
516 }
517
518 static void
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520                               struct decode_state *decode_state,
521                               VAPictureParameterBufferH264 *pic_param,
522                               VASliceParameterBufferH264 *slice_param,
523                               struct gen7_mfd_context *gen7_mfd_context)
524 {
525     struct i965_driver_data *i965 = i965_driver_data(ctx);
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
545                             I915_GEM_DOMAIN_INSTRUCTION, 0,
546                             0);
547         } else {
548             OUT_BCS_BATCH(batch, 0);
549             OUT_BCS_BATCH(batch, 0);
550         }
551     }
552
553     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
554
555     /* the current decoding frame/field */
556     va_pic = &pic_param->CurrPic;
557     obj_surface = decode_state->render_object;
558     assert(obj_surface->bo && obj_surface->private_data);
559     gen7_avc_surface = obj_surface->private_data;
560
561     OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
562                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
563                     0);
564
565     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
566
567     /* POC List */
568     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
569         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
570
571         if (obj_surface) {
572             const VAPictureH264 * const va_pic = avc_find_picture(
573                                                      obj_surface->base.id, pic_param->ReferenceFrames,
574                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
575
576             assert(va_pic != NULL);
577             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
578             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
579         } else {
580             OUT_BCS_BATCH(batch, 0);
581             OUT_BCS_BATCH(batch, 0);
582         }
583     }
584
585     va_pic = &pic_param->CurrPic;
586     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
587     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
588
589     ADVANCE_BCS_BATCH(batch);
590 }
591
592 static void
593 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
594                                  VAPictureParameterBufferH264 *pic_param,
595                                  VASliceParameterBufferH264 *next_slice_param,
596                                  struct gen7_mfd_context *gen7_mfd_context)
597 {
598     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
599 }
600
601 static void
602 gen8_mfd_avc_slice_state(VADriverContextP ctx,
603                          VAPictureParameterBufferH264 *pic_param,
604                          VASliceParameterBufferH264 *slice_param,
605                          VASliceParameterBufferH264 *next_slice_param,
606                          struct gen7_mfd_context *gen7_mfd_context)
607 {
608     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
609     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
610     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
611     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
612     int num_ref_idx_l0, num_ref_idx_l1;
613     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
614                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
615     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
616     int slice_type;
617
618     if (slice_param->slice_type == SLICE_TYPE_I ||
619         slice_param->slice_type == SLICE_TYPE_SI) {
620         slice_type = SLICE_TYPE_I;
621     } else if (slice_param->slice_type == SLICE_TYPE_P ||
622                slice_param->slice_type == SLICE_TYPE_SP) {
623         slice_type = SLICE_TYPE_P;
624     } else {
625         assert(slice_param->slice_type == SLICE_TYPE_B);
626         slice_type = SLICE_TYPE_B;
627     }
628
629     if (slice_type == SLICE_TYPE_I) {
630         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
631         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
632         num_ref_idx_l0 = 0;
633         num_ref_idx_l1 = 0;
634     } else if (slice_type == SLICE_TYPE_P) {
635         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
637         num_ref_idx_l1 = 0;
638     } else {
639         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
640         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
641     }
642
643     first_mb_in_slice = slice_param->first_mb_in_slice;
644     slice_hor_pos = first_mb_in_slice % width_in_mbs;
645     slice_ver_pos = first_mb_in_slice / width_in_mbs;
646
647     if (mbaff_picture)
648         slice_ver_pos = slice_ver_pos << 1;
649     if (next_slice_param) {
650         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
651         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
652         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
653
654         if (mbaff_picture)
655             next_slice_ver_pos = next_slice_ver_pos << 1;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch,
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch,
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch,
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) |
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch,
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch,
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
839                                        gen7_mfd_context->reference_surface);
840     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
841     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
842     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
843     assert(height_in_mbs > 0 && height_in_mbs <= 256);
844
845     /* Current decoded picture */
846     obj_surface = decode_state->render_object;
847     if (pic_param->pic_fields.bits.reference_pic_flag)
848         obj_surface->flags |= SURFACE_REFERENCED;
849     else
850         obj_surface->flags &= ~SURFACE_REFERENCED;
851
852     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
853     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
854
855     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
856     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
857     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
858     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
859
860     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
861     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
862     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
863     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
864
865     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
866     bo = dri_bo_alloc(i965->intel.bufmgr,
867                       "intra row store",
868                       width_in_mbs * 64,
869                       0x1000);
870     assert(bo);
871     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
872     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
873
874     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
875     bo = dri_bo_alloc(i965->intel.bufmgr,
876                       "deblocking filter row store",
877                       width_in_mbs * 64 * 4,
878                       0x1000);
879     assert(bo);
880     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
881     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
882
883     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
884     bo = dri_bo_alloc(i965->intel.bufmgr,
885                       "bsd mpc row store",
886                       width_in_mbs * 64 * 2,
887                       0x1000);
888     assert(bo);
889     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
890     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
891
892     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
893     bo = dri_bo_alloc(i965->intel.bufmgr,
894                       "mpr row store",
895                       width_in_mbs * 64 * 2,
896                       0x1000);
897     assert(bo);
898     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
899     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
900
901     gen7_mfd_context->bitplane_read_buffer.valid = 0;
902 }
903
904 static void
905 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
906                             struct decode_state *decode_state,
907                             struct gen7_mfd_context *gen7_mfd_context)
908 {
909     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
910     VAPictureParameterBufferH264 *pic_param;
911     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
912     dri_bo *slice_data_bo;
913     int i, j;
914
915     assert(decode_state->pic_param && decode_state->pic_param->buffer);
916     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
917     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
918
919     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
920     intel_batchbuffer_emit_mi_flush(batch);
921     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
923     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
924     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
925     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
926     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
927     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
928
929     for (j = 0; j < decode_state->num_slice_params; j++) {
930         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
931         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
932         slice_data_bo = decode_state->slice_datas[j]->bo;
933         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
934
935         if (j == decode_state->num_slice_params - 1)
936             next_slice_group_param = NULL;
937         else
938             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
939
940         if (j == 0 && slice_param->first_mb_in_slice)
941             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
942
943         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
944             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
945             assert((slice_param->slice_type == SLICE_TYPE_I) ||
946                    (slice_param->slice_type == SLICE_TYPE_SI) ||
947                    (slice_param->slice_type == SLICE_TYPE_P) ||
948                    (slice_param->slice_type == SLICE_TYPE_SP) ||
949                    (slice_param->slice_type == SLICE_TYPE_B));
950
951             if (i < decode_state->slice_params[j]->num_elements - 1)
952                 next_slice_param = slice_param + 1;
953             else
954                 next_slice_param = next_slice_group_param;
955
956             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
957             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
958             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
959             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
960             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
961             slice_param++;
962         }
963     }
964
965     intel_batchbuffer_end_atomic(batch);
966     intel_batchbuffer_flush(batch);
967 }
968
969 static void
970 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
971                            struct decode_state *decode_state,
972                            struct gen7_mfd_context *gen7_mfd_context)
973 {
974     VAPictureParameterBufferMPEG2 *pic_param;
975     struct i965_driver_data *i965 = i965_driver_data(ctx);
976     struct object_surface *obj_surface;
977     dri_bo *bo;
978     unsigned int width_in_mbs;
979
980     assert(decode_state->pic_param && decode_state->pic_param->buffer);
981     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
982     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
983
984     mpeg2_set_reference_surfaces(
985         ctx,
986         gen7_mfd_context->reference_surface,
987         decode_state,
988         pic_param
989     );
990
991     /* Current decoded picture */
992     obj_surface = decode_state->render_object;
993     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
994
995     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
996     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
997     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
998     gen7_mfd_context->pre_deblocking_output.valid = 1;
999
1000     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1001     bo = dri_bo_alloc(i965->intel.bufmgr,
1002                       "bsd mpc row store",
1003                       width_in_mbs * 96,
1004                       0x1000);
1005     assert(bo);
1006     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1007     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1008
1009     gen7_mfd_context->post_deblocking_output.valid = 0;
1010     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1011     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1012     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1013     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1014 }
1015
1016 static void
1017 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1018                          struct decode_state *decode_state,
1019                          struct gen7_mfd_context *gen7_mfd_context)
1020 {
1021     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1022     VAPictureParameterBufferMPEG2 *pic_param;
1023     unsigned int slice_concealment_disable_bit = 0;
1024
1025     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1026     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1027
1028     slice_concealment_disable_bit = 1;
1029
1030     BEGIN_BCS_BATCH(batch, 13);
1031     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1032     OUT_BCS_BATCH(batch,
1033                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1034                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1035                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1036                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1037                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1038                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1039                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1040                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1041                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1042                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1043                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1044                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1045     OUT_BCS_BATCH(batch,
1046                   pic_param->picture_coding_type << 9);
1047     OUT_BCS_BATCH(batch,
1048                   (slice_concealment_disable_bit << 31) |
1049                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1050                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1051     OUT_BCS_BATCH(batch, 0);
1052     OUT_BCS_BATCH(batch, 0);
1053     OUT_BCS_BATCH(batch, 0);
1054     OUT_BCS_BATCH(batch, 0);
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     ADVANCE_BCS_BATCH(batch);
1061 }
1062
1063 static void
1064 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1065                         struct decode_state *decode_state,
1066                         struct gen7_mfd_context *gen7_mfd_context)
1067 {
1068     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1069     int i, j;
1070
1071     /* Update internal QM state */
1072     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1073         VAIQMatrixBufferMPEG2 * const iq_matrix =
1074             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1075
1076         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1077             iq_matrix->load_intra_quantiser_matrix) {
1078             gen_iq_matrix->load_intra_quantiser_matrix =
1079                 iq_matrix->load_intra_quantiser_matrix;
1080             if (iq_matrix->load_intra_quantiser_matrix) {
1081                 for (j = 0; j < 64; j++)
1082                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1083                         iq_matrix->intra_quantiser_matrix[j];
1084             }
1085         }
1086
1087         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1088             iq_matrix->load_non_intra_quantiser_matrix) {
1089             gen_iq_matrix->load_non_intra_quantiser_matrix =
1090                 iq_matrix->load_non_intra_quantiser_matrix;
1091             if (iq_matrix->load_non_intra_quantiser_matrix) {
1092                 for (j = 0; j < 64; j++)
1093                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1094                         iq_matrix->non_intra_quantiser_matrix[j];
1095             }
1096         }
1097     }
1098
1099     /* Commit QM state to HW */
1100     for (i = 0; i < 2; i++) {
1101         unsigned char *qm = NULL;
1102         int qm_type;
1103
1104         if (i == 0) {
1105             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1106                 qm = gen_iq_matrix->intra_quantiser_matrix;
1107                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1108             }
1109         } else {
1110             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1111                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1112                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1113             }
1114         }
1115
1116         if (!qm)
1117             continue;
1118
1119         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1120     }
1121 }
1122
1123 static void
1124 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1125                           VAPictureParameterBufferMPEG2 *pic_param,
1126                           VASliceParameterBufferMPEG2 *slice_param,
1127                           VASliceParameterBufferMPEG2 *next_slice_param,
1128                           struct gen7_mfd_context *gen7_mfd_context)
1129 {
1130     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1131     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1132     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1133
1134     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1135         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1136         is_field_pic = 1;
1137     is_field_pic_wa = is_field_pic &&
1138                       gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1139
1140     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1141     hpos0 = slice_param->slice_horizontal_position;
1142
1143     if (next_slice_param == NULL) {
1144         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1145         hpos1 = 0;
1146     } else {
1147         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1148         hpos1 = next_slice_param->slice_horizontal_position;
1149     }
1150
1151     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1152
1153     BEGIN_BCS_BATCH(batch, 5);
1154     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1155     OUT_BCS_BATCH(batch,
1156                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1157     OUT_BCS_BATCH(batch,
1158                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1159     OUT_BCS_BATCH(batch,
1160                   hpos0 << 24 |
1161                   vpos0 << 16 |
1162                   mb_count << 8 |
1163                   (next_slice_param == NULL) << 5 |
1164                   (next_slice_param == NULL) << 3 |
1165                   (slice_param->macroblock_offset & 0x7));
1166     OUT_BCS_BATCH(batch,
1167                   (slice_param->quantiser_scale_code << 24) |
1168                   (vpos1 << 8 | hpos1));
1169     ADVANCE_BCS_BATCH(batch);
1170 }
1171
1172 static void
1173 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1174                               struct decode_state *decode_state,
1175                               struct gen7_mfd_context *gen7_mfd_context)
1176 {
1177     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1178     VAPictureParameterBufferMPEG2 *pic_param;
1179     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1180     dri_bo *slice_data_bo;
1181     int i, j;
1182
1183     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1184     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1185
1186     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1187     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1188     intel_batchbuffer_emit_mi_flush(batch);
1189     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1194     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1195
1196     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1197         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1198             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1199
1200     for (j = 0; j < decode_state->num_slice_params; j++) {
1201         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1202         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1203         slice_data_bo = decode_state->slice_datas[j]->bo;
1204         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1205
1206         if (j == decode_state->num_slice_params - 1)
1207             next_slice_group_param = NULL;
1208         else
1209             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1210
1211         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1212             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1213
1214             if (i < decode_state->slice_params[j]->num_elements - 1)
1215                 next_slice_param = slice_param + 1;
1216             else
1217                 next_slice_param = next_slice_group_param;
1218
1219             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1220             slice_param++;
1221         }
1222     }
1223
1224     intel_batchbuffer_end_atomic(batch);
1225     intel_batchbuffer_flush(batch);
1226 }
1227
1228 static const int va_to_gen7_vc1_mv[4] = {
1229     1, /* 1-MV */
1230     2, /* 1-MV half-pel */
1231     3, /* 1-MV half-pef bilinear */
1232     0, /* Mixed MV */
1233 };
1234
1235 static const int b_picture_scale_factor[21] = {
1236     128, 85,  170, 64,  192,
1237     51,  102, 153, 204, 43,
1238     215, 37,  74,  111, 148,
1239     185, 222, 32,  96,  160,
1240     224,
1241 };
1242
1243 static const int va_to_gen7_vc1_condover[3] = {
1244     0,
1245     2,
1246     3
1247 };
1248
1249 static const int va_to_gen7_vc1_profile[4] = {
1250     GEN7_VC1_SIMPLE_PROFILE,
1251     GEN7_VC1_MAIN_PROFILE,
1252     GEN7_VC1_RESERVED_PROFILE,
1253     GEN7_VC1_ADVANCED_PROFILE
1254 };
1255
1256 static const int fptype_to_picture_type[8][2] = {
1257     {GEN7_VC1_I_PICTURE, GEN7_VC1_I_PICTURE},
1258     {GEN7_VC1_I_PICTURE, GEN7_VC1_P_PICTURE},
1259     {GEN7_VC1_P_PICTURE, GEN7_VC1_I_PICTURE},
1260     {GEN7_VC1_P_PICTURE, GEN7_VC1_P_PICTURE},
1261     {GEN7_VC1_B_PICTURE, GEN7_VC1_B_PICTURE},
1262     {GEN7_VC1_B_PICTURE, GEN7_VC1_BI_PICTURE},
1263     {GEN7_VC1_BI_PICTURE, GEN7_VC1_B_PICTURE},
1264     {GEN7_VC1_BI_PICTURE, GEN7_VC1_BI_PICTURE}
1265 };
1266
1267 static void
1268 gen8_mfd_free_vc1_surface(void **data)
1269 {
1270     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1271
1272     if (!gen7_vc1_surface)
1273         return;
1274
1275     dri_bo_unreference(gen7_vc1_surface->dmv_top);
1276     dri_bo_unreference(gen7_vc1_surface->dmv_bottom);
1277     free(gen7_vc1_surface);
1278     *data = NULL;
1279 }
1280
1281 static void
1282 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1283                           VAPictureParameterBufferVC1 *pic_param,
1284                           struct object_surface *obj_surface)
1285 {
1286     struct i965_driver_data *i965 = i965_driver_data(ctx);
1287     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1288     int height_in_mbs;
1289     int picture_type;
1290     int is_first_field = 1;
1291
1292     if (!pic_param->sequence_fields.bits.interlace ||
1293         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1294         picture_type = pic_param->picture_fields.bits.picture_type;
1295     } else {/* Field-Interlace */
1296         is_first_field = pic_param->picture_fields.bits.is_first_field;
1297         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1298     }
1299
1300     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1301
1302     if (!gen7_vc1_surface) {
1303         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1304
1305         if (!gen7_vc1_surface)
1306             return;
1307
1308         assert((obj_surface->size & 0x3f) == 0);
1309         obj_surface->private_data = gen7_vc1_surface;
1310     }
1311
1312     if (!pic_param->sequence_fields.bits.interlace ||
1313         pic_param->picture_fields.bits.frame_coding_mode < 2 || /* Progressive or Frame-Interlace */
1314         is_first_field) {
1315         gen7_vc1_surface->picture_type_top = 0;
1316         gen7_vc1_surface->picture_type_bottom = 0;
1317         gen7_vc1_surface->intensity_compensation_top = 0;
1318         gen7_vc1_surface->intensity_compensation_bottom = 0;
1319         gen7_vc1_surface->luma_scale_top[0] = 0;
1320         gen7_vc1_surface->luma_scale_top[1] = 0;
1321         gen7_vc1_surface->luma_scale_bottom[0] = 0;
1322         gen7_vc1_surface->luma_scale_bottom[1] = 0;
1323         gen7_vc1_surface->luma_shift_top[0] = 0;
1324         gen7_vc1_surface->luma_shift_top[1] = 0;
1325         gen7_vc1_surface->luma_shift_bottom[0] = 0;
1326         gen7_vc1_surface->luma_shift_bottom[1] = 0;
1327     }
1328
1329     if (!pic_param->sequence_fields.bits.interlace ||
1330         pic_param->picture_fields.bits.frame_coding_mode < 2) { /* Progressive or Frame-Interlace */
1331         gen7_vc1_surface->picture_type_top = picture_type;
1332         gen7_vc1_surface->picture_type_bottom = picture_type;
1333     } else if (pic_param->picture_fields.bits.top_field_first ^ is_first_field)
1334         gen7_vc1_surface->picture_type_bottom = picture_type;
1335     else
1336         gen7_vc1_surface->picture_type_top = picture_type;
1337
1338     if (gen7_vc1_surface->dmv_top == NULL) {
1339         height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1340         gen7_vc1_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
1341                                              "direct mv w/r buffer",
1342                                              128 * height_in_mbs * 64,
1343                                              0x1000);
1344     }
1345
1346     if (pic_param->sequence_fields.bits.interlace &&
1347         gen7_vc1_surface->dmv_bottom == NULL) {
1348         height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
1349         gen7_vc1_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
1350                                              "direct mv w/r buffer",
1351                                              128 * height_in_mbs * 64,
1352                                              0x1000);
1353     }
1354 }
1355
1356 static void
1357 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1358                          struct decode_state *decode_state,
1359                          struct gen7_mfd_context *gen7_mfd_context)
1360 {
1361     VAPictureParameterBufferVC1 *pic_param;
1362     struct i965_driver_data *i965 = i965_driver_data(ctx);
1363     struct object_surface *obj_surface;
1364     struct gen7_vc1_surface *gen7_vc1_current_surface;
1365     struct gen7_vc1_surface *gen7_vc1_forward_surface;
1366     dri_bo *bo;
1367     int width_in_mbs;
1368     int picture_type;
1369     int is_first_field = 1;
1370     int i;
1371
1372     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1373     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1374     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1375
1376     if (!pic_param->sequence_fields.bits.interlace ||
1377         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1378         picture_type = pic_param->picture_fields.bits.picture_type;
1379     } else {/* Field-Interlace */
1380         is_first_field = pic_param->picture_fields.bits.is_first_field;
1381         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1382     }
1383
1384     /* Current decoded picture */
1385     obj_surface = decode_state->render_object;
1386     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1387     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1388
1389     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1390     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1391     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1392
1393     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1394     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1395     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1396
1397     if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1398         gen7_mfd_context->post_deblocking_output.valid = 0;
1399         gen7_mfd_context->pre_deblocking_output.valid = 1;
1400     } else {
1401         gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1402         gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1403     }
1404
1405     intel_update_vc1_frame_store_index(ctx,
1406                                        decode_state,
1407                                        pic_param,
1408                                        gen7_mfd_context->reference_surface);
1409
1410     if (picture_type == GEN7_VC1_P_PICTURE) {
1411         obj_surface = decode_state->reference_objects[0];
1412         gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1413         if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1414             obj_surface)
1415             gen7_vc1_forward_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1416         else
1417             gen7_vc1_forward_surface = NULL;
1418
1419         if (!pic_param->sequence_fields.bits.interlace ||
1420             pic_param->picture_fields.bits.frame_coding_mode == 0) { /* Progressive */
1421             if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1422                 if (gen7_vc1_forward_surface) {
1423                     gen7_vc1_forward_surface->intensity_compensation_top = 1;
1424                     gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
1425                     gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
1426                     gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
1427                     gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
1428                     gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
1429                 }
1430             }
1431         } else if (pic_param->sequence_fields.bits.interlace &&
1432             pic_param->picture_fields.bits.frame_coding_mode == 1) { /* Frame-Interlace */
1433             if (pic_param->picture_fields.bits.intensity_compensation) {
1434                 if (gen7_vc1_forward_surface) {
1435                     gen7_vc1_forward_surface->intensity_compensation_top = 1;
1436                     gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
1437                     gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
1438                     gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
1439                     gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
1440                     gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
1441                 }
1442             }
1443         } else if (pic_param->sequence_fields.bits.interlace &&
1444                    pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
1445             if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1446                 if (pic_param->intensity_compensation_field == 1 || /* Top field */
1447                     pic_param->intensity_compensation_field == 0) { /* Both fields */
1448                     if (is_first_field) {
1449                         if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1450                              (pic_param->reference_fields.bits.reference_field_pic_indicator ==
1451                              pic_param->picture_fields.bits.top_field_first)) ||
1452                             pic_param->reference_fields.bits.num_reference_pictures) {
1453                             if (gen7_vc1_forward_surface) {
1454                                 i = gen7_vc1_forward_surface->intensity_compensation_top++;
1455                                 gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
1456                                 gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
1457                             }
1458                         }
1459                     } else { /* Second field */
1460                         if (pic_param->picture_fields.bits.top_field_first) {
1461                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1462                                  !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1463                                 pic_param->reference_fields.bits.num_reference_pictures) {
1464                                 i = gen7_vc1_current_surface->intensity_compensation_top++;
1465                                 gen7_vc1_current_surface->luma_scale_top[i] = pic_param->luma_scale;
1466                                 gen7_vc1_current_surface->luma_shift_top[i] = pic_param->luma_shift;
1467                             }
1468                         } else {
1469                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1470                                  pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1471                                 pic_param->reference_fields.bits.num_reference_pictures) {
1472                                 if (gen7_vc1_forward_surface) {
1473                                     i = gen7_vc1_forward_surface->intensity_compensation_top++;
1474                                     gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
1475                                     gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
1476                                 }
1477                             }
1478                         }
1479                     }
1480                 }
1481                 if (pic_param->intensity_compensation_field == 2 || /* Bottom field */
1482                     pic_param->intensity_compensation_field == 0) { /* Both fields */
1483                     if (is_first_field) {
1484                         if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1485                              (pic_param->reference_fields.bits.reference_field_pic_indicator ^
1486                               pic_param->picture_fields.bits.top_field_first)) ||
1487                             pic_param->reference_fields.bits.num_reference_pictures) {
1488                             if (gen7_vc1_forward_surface) {
1489                                 i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
1490                                 if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1491                                     gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1492                                     gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1493                                 } else { /* Both fields */
1494                                     gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1495                                     gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1496                                 }
1497                             }
1498                         }
1499                     } else { /* Second field */
1500                         if (pic_param->picture_fields.bits.top_field_first) {
1501                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1502                                  pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1503                                 pic_param->reference_fields.bits.num_reference_pictures) {
1504                                 if (gen7_vc1_forward_surface) {
1505                                     i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
1506                                     if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1507                                         gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1508                                         gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1509                                     } else { /* Both fields */
1510                                         gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1511                                         gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1512                                     }
1513                                 }
1514                             }
1515                         } else {
1516                            if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1517                                  !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1518                                 pic_param->reference_fields.bits.num_reference_pictures) {
1519                                 i = gen7_vc1_current_surface->intensity_compensation_bottom++;
1520                                if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1521                                    gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1522                                    gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1523                                 } else { /* Both fields */
1524                                     gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1525                                     gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1526                                 }
1527                             }
1528                         }
1529                     }
1530                 }
1531             }
1532         }
1533     }
1534
1535     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1536     bo = dri_bo_alloc(i965->intel.bufmgr,
1537                       "intra row store",
1538                       width_in_mbs * 64,
1539                       0x1000);
1540     assert(bo);
1541     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1542     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1543
1544     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1545     bo = dri_bo_alloc(i965->intel.bufmgr,
1546                       "deblocking filter row store",
1547                       width_in_mbs * 7 * 64,
1548                       0x1000);
1549     assert(bo);
1550     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1551     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1552
1553     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1554     bo = dri_bo_alloc(i965->intel.bufmgr,
1555                       "bsd mpc row store",
1556                       width_in_mbs * 96,
1557                       0x1000);
1558     assert(bo);
1559     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1560     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1561
1562     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1563
1564     if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1565         gen7_mfd_context->bitplane_read_buffer.valid = 1;
1566     else
1567         gen7_mfd_context->bitplane_read_buffer.valid = !!(pic_param->bitplane_present.value & 0x7f);
1568     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1569
1570     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1571         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1572         int height_in_mbs;
1573         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1574         int src_w, src_h;
1575         uint8_t *src = NULL, *dst = NULL;
1576
1577         if (!pic_param->sequence_fields.bits.interlace ||
1578             (pic_param->picture_fields.bits.frame_coding_mode < 2)) /* Progressive or Frame-Interlace */
1579             height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1580         else /* Field-Interlace */
1581             height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
1582
1583         bo = dri_bo_alloc(i965->intel.bufmgr,
1584                           "VC-1 Bitplane",
1585                           bitplane_width * height_in_mbs,
1586                           0x1000);
1587         assert(bo);
1588         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1589
1590         dri_bo_map(bo, True);
1591         assert(bo->virtual);
1592         dst = bo->virtual;
1593
1594         if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1595             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1596                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1597                     int dst_index;
1598                     uint8_t src_value = 0x2;
1599
1600                     dst_index = src_w / 2;
1601                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1602                 }
1603
1604                 if (src_w & 1)
1605                     dst[src_w / 2] >>= 4;
1606
1607                 dst += bitplane_width;
1608             }
1609         } else {
1610             assert(decode_state->bit_plane->buffer);
1611             src = decode_state->bit_plane->buffer;
1612
1613             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1614                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1615                     int src_index, dst_index;
1616                     int src_shift;
1617                     uint8_t src_value;
1618
1619                     src_index = (src_h * width_in_mbs + src_w) / 2;
1620                     src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1621                     src_value = ((src[src_index] >> src_shift) & 0xf);
1622
1623                     dst_index = src_w / 2;
1624                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1625                 }
1626
1627                 if (src_w & 1)
1628                     dst[src_w / 2] >>= 4;
1629
1630                 dst += bitplane_width;
1631             }
1632         }
1633
1634         dri_bo_unmap(bo);
1635     } else
1636         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1637 }
1638
1639 static void
1640 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1641                        struct decode_state *decode_state,
1642                        struct gen7_mfd_context *gen7_mfd_context)
1643 {
1644     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1645     VAPictureParameterBufferVC1 *pic_param;
1646     struct object_surface *obj_surface;
1647     struct gen7_vc1_surface *gen7_vc1_surface;
1648     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1649     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1650     int unified_mv_mode = 0;
1651     int ref_field_pic_polarity = 0;
1652     int scale_factor = 0;
1653     int trans_ac_y = 0;
1654     int dmv_surface_valid = 0;
1655     int frfd = 0;
1656     int brfd = 0;
1657     int fcm = 0;
1658     int picture_type;
1659     int ptype;
1660     int profile;
1661     int overlap = 0;
1662     int interpolation_mode = 0;
1663     int height_in_mbs;
1664     int is_first_field = 1;
1665     int loopfilter = 0;
1666     int bitplane_present;
1667     int forward_mb = 0, mv_type_mb = 0, skip_mb = 0, direct_mb = 0;
1668     int overflags = 0, ac_pred = 0, field_tx = 0;
1669
1670     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1671     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1672
1673     if (!pic_param->sequence_fields.bits.interlace ||
1674         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1675         picture_type = pic_param->picture_fields.bits.picture_type;
1676         height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1677     } else {/* Field-Interlace */
1678         is_first_field = pic_param->picture_fields.bits.is_first_field;
1679         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1680         height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
1681     }
1682
1683     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1684     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1685     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1686     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1687     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1688     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1689     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1690     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1691
1692     if (dquant == 0) {
1693         alt_pquant_config = 0;
1694         alt_pquant_edge_mask = 0;
1695     } else if (dquant == 2) {
1696         alt_pquant_config = 1;
1697         alt_pquant_edge_mask = 0xf;
1698     } else {
1699         assert(dquant == 1);
1700         if (dquantfrm == 0) {
1701             alt_pquant_config = 0;
1702             alt_pquant_edge_mask = 0;
1703             alt_pq = 0;
1704         } else {
1705             assert(dquantfrm == 1);
1706             alt_pquant_config = 1;
1707
1708             switch (dqprofile) {
1709             case 3:
1710                 if (dqbilevel == 0) {
1711                     alt_pquant_config = 2;
1712                     alt_pquant_edge_mask = 0;
1713                 } else {
1714                     assert(dqbilevel == 1);
1715                     alt_pquant_config = 3;
1716                     alt_pquant_edge_mask = 0;
1717                 }
1718                 break;
1719
1720             case 0:
1721                 alt_pquant_edge_mask = 0xf;
1722                 break;
1723
1724             case 1:
1725                 if (dqdbedge == 3)
1726                     alt_pquant_edge_mask = 0x9;
1727                 else
1728                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1729
1730                 break;
1731
1732             case 2:
1733                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1734                 break;
1735
1736             default:
1737                 assert(0);
1738             }
1739         }
1740     }
1741
1742     if ((!pic_param->sequence_fields.bits.interlace ||
1743          pic_param->picture_fields.bits.frame_coding_mode != 1) && /* Progressive or Field-Interlace */
1744         (picture_type == GEN7_VC1_P_PICTURE ||
1745          picture_type == GEN7_VC1_B_PICTURE)) {
1746         if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1747             assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1748             unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1749         } else {
1750             assert(pic_param->mv_fields.bits.mv_mode < 4);
1751             unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1752         }
1753     }
1754
1755     if (pic_param->sequence_fields.bits.interlace &&
1756         pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
1757         picture_type == GEN7_VC1_P_PICTURE &&
1758         !pic_param->reference_fields.bits.num_reference_pictures) {
1759         if (pic_param->reference_fields.bits.reference_field_pic_indicator == 0) {
1760             ref_field_pic_polarity = is_first_field ?
1761                                         pic_param->picture_fields.bits.top_field_first :
1762                                         !pic_param->picture_fields.bits.top_field_first;
1763         } else {
1764             ref_field_pic_polarity = is_first_field ?
1765                                         !pic_param->picture_fields.bits.top_field_first :
1766                                         pic_param->picture_fields.bits.top_field_first;
1767         }
1768     }
1769
1770     if (pic_param->b_picture_fraction < 21)
1771         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1772
1773     if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1774         ptype = GEN7_VC1_P_PICTURE;
1775         bitplane_present = 1;
1776     } else {
1777         ptype = pic_param->picture_fields.bits.picture_type;
1778         bitplane_present = !!(pic_param->bitplane_present.value & 0x7f);
1779         forward_mb = pic_param->raw_coding.flags.forward_mb;
1780         mv_type_mb = pic_param->raw_coding.flags.mv_type_mb;
1781         skip_mb = pic_param->raw_coding.flags.skip_mb;
1782         direct_mb = pic_param->raw_coding.flags.direct_mb;
1783         overflags = pic_param->raw_coding.flags.overflags;
1784         ac_pred = pic_param->raw_coding.flags.ac_pred;
1785         field_tx = pic_param->raw_coding.flags.field_tx;
1786         loopfilter = pic_param->entrypoint_fields.bits.loopfilter;
1787     }
1788
1789     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1790         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1791     else {
1792         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1793
1794         /*
1795          * 8.3.6.2.1 Transform Type Selection
1796          * If variable-sized transform coding is not enabled,
1797          * then the 8x8 transform shall be used for all blocks.
1798          * it is also MFX_VC1_PIC_STATE requirement.
1799          */
1800         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1801             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1802             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1803         }
1804     }
1805
1806     if (picture_type == GEN7_VC1_B_PICTURE) {
1807         obj_surface = decode_state->reference_objects[1];
1808
1809         if (pic_param->backward_reference_picture != VA_INVALID_ID &&
1810             obj_surface)
1811             gen7_vc1_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1812         else
1813             gen7_vc1_surface = NULL;
1814
1815         if (gen7_vc1_surface) {
1816             if (pic_param->sequence_fields.bits.interlace &&
1817                 pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
1818                 pic_param->picture_fields.bits.top_field_first ^ is_first_field) {
1819                 if (gen7_vc1_surface->picture_type_bottom == GEN7_VC1_P_PICTURE)
1820                     dmv_surface_valid = 1;
1821             } else if (gen7_vc1_surface->picture_type_top == GEN7_VC1_P_PICTURE)
1822                 dmv_surface_valid = 1;
1823         }
1824     }
1825
1826     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1827
1828     if (pic_param->sequence_fields.bits.interlace) {
1829         if (!pic_param->picture_fields.bits.top_field_first)
1830             fcm = 3;
1831         else
1832             fcm = pic_param->picture_fields.bits.frame_coding_mode;
1833     }
1834
1835     if (pic_param->sequence_fields.bits.interlace &&
1836         pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
1837         if (picture_type == GEN7_VC1_I_PICTURE ||
1838              picture_type == GEN7_VC1_P_PICTURE) {
1839             gen7_vc1_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1840
1841             if (is_first_field)
1842                 gen7_vc1_surface->reference_distance = pic_param->reference_fields.bits.reference_distance;
1843
1844             frfd = gen7_vc1_surface->reference_distance;
1845         } else if (picture_type == GEN7_VC1_B_PICTURE) {
1846             obj_surface = decode_state->reference_objects[1];
1847
1848             if (pic_param->backward_reference_picture != VA_INVALID_ID &&
1849                 obj_surface)
1850                 gen7_vc1_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1851             else
1852                 gen7_vc1_surface = NULL;
1853
1854             if (gen7_vc1_surface) {
1855                 frfd = (scale_factor * gen7_vc1_surface->reference_distance) >> 8;
1856
1857                 brfd = gen7_vc1_surface->reference_distance - frfd - 1;
1858                 if (brfd < 0)
1859                     brfd = 0;
1860             }
1861         }
1862     }
1863
1864     if (pic_param->sequence_fields.bits.overlap) {
1865         if (profile == GEN7_VC1_ADVANCED_PROFILE) {
1866             if (picture_type == GEN7_VC1_P_PICTURE &&
1867                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1868                 overlap = 1;
1869             }
1870             if (picture_type == GEN7_VC1_I_PICTURE ||
1871                 picture_type == GEN7_VC1_BI_PICTURE) {
1872                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1873                     overlap = 1;
1874                 } else if (pic_param->conditional_overlap_flag == 1 || /* all block boundaries */
1875                            pic_param->conditional_overlap_flag == 2) { /* coded by OVERFLAGSMB bitplane */
1876                     overlap = 1;
1877                 }
1878             }
1879         } else {
1880             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1881                 picture_type != GEN7_VC1_B_PICTURE) {
1882                 overlap = 1;
1883             }
1884         }
1885     }
1886
1887     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1888         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1889          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1890         interpolation_mode = 8 | pic_param->fast_uvmc_flag;
1891     else
1892         interpolation_mode = 0 | pic_param->fast_uvmc_flag;
1893
1894     BEGIN_BCS_BATCH(batch, 6);
1895     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1896     OUT_BCS_BATCH(batch,
1897                   ((height_in_mbs - 1) << 16) |
1898                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1899     OUT_BCS_BATCH(batch,
1900                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1901                   dmv_surface_valid << 15 |
1902                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1903                   pic_param->rounding_control << 13 |
1904                   pic_param->sequence_fields.bits.syncmarker << 12 |
1905                   interpolation_mode << 8 |
1906                   0 << 7 | /* FIXME: scale up or down ??? */
1907                   pic_param->range_reduction_frame << 6 |
1908                   loopfilter << 5 |
1909                   overlap << 4 |
1910                   !is_first_field << 3 |
1911                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1912     OUT_BCS_BATCH(batch,
1913                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1914                   ptype << 26 |
1915                   fcm << 24 |
1916                   alt_pq << 16 |
1917                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1918                   scale_factor << 0);
1919     OUT_BCS_BATCH(batch,
1920                   unified_mv_mode << 28 |
1921                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1922                   pic_param->fast_uvmc_flag << 26 |
1923                   ref_field_pic_polarity << 25 |
1924                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1925                   brfd << 20 |
1926                   frfd << 16 |
1927                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1928                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1929                   alt_pquant_edge_mask << 4 |
1930                   alt_pquant_config << 2 |
1931                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1932                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1933     OUT_BCS_BATCH(batch,
1934                   bitplane_present << 31 |
1935                   forward_mb << 30 |
1936                   mv_type_mb << 29 |
1937                   skip_mb << 28 |
1938                   direct_mb << 27 |
1939                   overflags << 26 |
1940                   ac_pred << 25 |
1941                   field_tx << 24 |
1942                   pic_param->mv_fields.bits.mv_table << 20 |
1943                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1944                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1945                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1946                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1947                   pic_param->mb_mode_table << 8 |
1948                   trans_ac_y << 6 |
1949                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1950                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1951                   pic_param->cbp_table << 0);
1952     ADVANCE_BCS_BATCH(batch);
1953 }
1954
1955 static void
1956 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1957                              struct decode_state *decode_state,
1958                              struct gen7_mfd_context *gen7_mfd_context)
1959 {
1960     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1961     VAPictureParameterBufferVC1 *pic_param;
1962     struct gen7_vc1_surface *gen7_vc1_top_surface;
1963     struct gen7_vc1_surface *gen7_vc1_bottom_surface;
1964     int picture_type;
1965     int is_first_field = 1;
1966     int intensitycomp_single_fwd = 0;
1967     int intensitycomp_single_bwd = 0;
1968     int intensitycomp_double_fwd = 0;
1969     int lumscale1_single_fwd = 0;
1970     int lumscale2_single_fwd = 0;
1971     int lumshift1_single_fwd = 0;
1972     int lumshift2_single_fwd = 0;
1973     int lumscale1_single_bwd = 0;
1974     int lumscale2_single_bwd = 0;
1975     int lumshift1_single_bwd = 0;
1976     int lumshift2_single_bwd = 0;
1977     int lumscale1_double_fwd = 0;
1978     int lumscale2_double_fwd = 0;
1979     int lumshift1_double_fwd = 0;
1980     int lumshift2_double_fwd = 0;
1981     int replication_mode = 0;
1982
1983     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1984     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1985
1986     if (!pic_param->sequence_fields.bits.interlace ||
1987         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1988         picture_type = pic_param->picture_fields.bits.picture_type;
1989     } else {/* Field-Interlace */
1990         is_first_field = pic_param->picture_fields.bits.is_first_field;
1991         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1992     }
1993
1994     if (picture_type == GEN7_VC1_P_PICTURE ||
1995         picture_type == GEN7_VC1_B_PICTURE) {
1996         if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID)
1997             gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[0].obj_surface->private_data);
1998         else
1999             gen7_vc1_top_surface = NULL;
2000
2001         if (gen7_vc1_top_surface) {
2002             intensitycomp_single_fwd = !!gen7_vc1_top_surface->intensity_compensation_top;
2003             lumscale1_single_fwd = gen7_vc1_top_surface->luma_scale_top[0];
2004             lumshift1_single_fwd = gen7_vc1_top_surface->luma_shift_top[0];
2005             if (gen7_vc1_top_surface->intensity_compensation_top == 2) {
2006                 intensitycomp_double_fwd = 1;
2007                 lumscale1_double_fwd = gen7_vc1_top_surface->luma_scale_top[1];
2008                 lumshift1_double_fwd = gen7_vc1_top_surface->luma_shift_top[1];
2009             }
2010         }
2011
2012         if (pic_param->sequence_fields.bits.interlace &&
2013             pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2014             if (gen7_mfd_context->reference_surface[2].surface_id != VA_INVALID_ID)
2015                 gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[2].obj_surface->private_data);
2016             else
2017                 gen7_vc1_bottom_surface = NULL;
2018
2019             if (gen7_vc1_bottom_surface) {
2020                 intensitycomp_single_fwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
2021                 lumscale2_single_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
2022                 lumshift2_single_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
2023                 if (gen7_vc1_bottom_surface->intensity_compensation_bottom == 2) {
2024                     intensitycomp_double_fwd |= 2;
2025                     lumscale2_double_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[1];
2026                     lumshift2_double_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[1];
2027                 }
2028             }
2029         }
2030     }
2031
2032     if (picture_type == GEN7_VC1_B_PICTURE) {
2033         if (gen7_mfd_context->reference_surface[1].surface_id != VA_INVALID_ID)
2034             gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[1].obj_surface->private_data);
2035         else
2036             gen7_vc1_top_surface = NULL;
2037
2038         if (gen7_vc1_top_surface) {
2039             intensitycomp_single_bwd = !!gen7_vc1_top_surface->intensity_compensation_top;
2040             lumscale1_single_bwd = gen7_vc1_top_surface->luma_scale_top[0];
2041             lumshift1_single_bwd = gen7_vc1_top_surface->luma_shift_top[0];
2042         }
2043
2044         if (pic_param->sequence_fields.bits.interlace &&
2045             pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2046             if (gen7_mfd_context->reference_surface[3].surface_id != VA_INVALID_ID)
2047                 gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[3].obj_surface->private_data);
2048             else
2049                 gen7_vc1_bottom_surface = NULL;
2050
2051             if (gen7_vc1_bottom_surface) {
2052                 intensitycomp_single_bwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
2053                 lumscale2_single_bwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
2054                 lumshift2_single_bwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
2055             }
2056         }
2057     }
2058
2059     if (pic_param->sequence_fields.bits.interlace &&
2060         pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2061         if (picture_type == GEN7_VC1_P_PICTURE)
2062             replication_mode = 0x5;
2063         else if (picture_type == GEN7_VC1_B_PICTURE)
2064             replication_mode = 0xf;
2065     }
2066
2067     BEGIN_BCS_BATCH(batch, 6);
2068     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
2069     OUT_BCS_BATCH(batch,
2070                   intensitycomp_double_fwd << 14 |
2071                   0 << 12 |
2072                   intensitycomp_single_fwd << 10 |
2073                   intensitycomp_single_bwd << 8 |
2074                   replication_mode << 4 |
2075                   0);
2076     OUT_BCS_BATCH(batch,
2077                   lumshift2_single_fwd << 24 |
2078                   lumshift1_single_fwd << 16 |
2079                   lumscale2_single_fwd << 8 |
2080                   lumscale1_single_fwd << 0);
2081     OUT_BCS_BATCH(batch,
2082                   lumshift2_double_fwd << 24 |
2083                   lumshift1_double_fwd << 16 |
2084                   lumscale2_double_fwd << 8 |
2085                   lumscale1_double_fwd << 0);
2086     OUT_BCS_BATCH(batch,
2087                   lumshift2_single_bwd << 24 |
2088                   lumshift1_single_bwd << 16 |
2089                   lumscale2_single_bwd << 8 |
2090                   lumscale1_single_bwd << 0);
2091     OUT_BCS_BATCH(batch,
2092                   0 << 24 |
2093                   0 << 16 |
2094                   0 << 8 |
2095                   0 << 0);
2096     ADVANCE_BCS_BATCH(batch);
2097 }
2098
2099 static void
2100 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
2101                               struct decode_state *decode_state,
2102                               struct gen7_mfd_context *gen7_mfd_context)
2103 {
2104     struct i965_driver_data *i965 = i965_driver_data(ctx);
2105     VAPictureParameterBufferVC1 *pic_param;
2106     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2107     struct object_surface *obj_surface;
2108     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2109     int picture_type;
2110     int is_first_field = 1;
2111
2112     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2113
2114     if (!pic_param->sequence_fields.bits.interlace ||
2115         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
2116         picture_type = pic_param->picture_fields.bits.picture_type;
2117     } else {/* Field-Interlace */
2118         is_first_field = pic_param->picture_fields.bits.is_first_field;
2119         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
2120     }
2121
2122     if (picture_type == GEN7_VC1_P_PICTURE ||
2123         picture_type == GEN7_VC1_SKIPPED_PICTURE) {
2124         obj_surface = decode_state->render_object;
2125
2126         if (pic_param->sequence_fields.bits.interlace &&
2127             (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2128             (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2129             dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2130         else
2131             dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2132     }
2133
2134     if (picture_type == GEN7_VC1_B_PICTURE) {
2135         obj_surface = decode_state->reference_objects[1];
2136         if (pic_param->backward_reference_picture != VA_INVALID_ID &&
2137             obj_surface &&
2138             obj_surface->private_data) {
2139
2140             if (pic_param->sequence_fields.bits.interlace &&
2141                 (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2142                 (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2143                 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2144             else
2145                 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2146         }
2147     }
2148
2149     BEGIN_BCS_BATCH(batch, 7);
2150     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
2151
2152     if (dmv_write_buffer)
2153         OUT_BCS_RELOC64(batch, dmv_write_buffer,
2154                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2155                         0);
2156     else {
2157         OUT_BCS_BATCH(batch, 0);
2158         OUT_BCS_BATCH(batch, 0);
2159     }
2160
2161     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2162
2163     if (dmv_read_buffer)
2164         OUT_BCS_RELOC64(batch, dmv_read_buffer,
2165                         I915_GEM_DOMAIN_INSTRUCTION, 0,
2166                         0);
2167     else {
2168         OUT_BCS_BATCH(batch, 0);
2169         OUT_BCS_BATCH(batch, 0);
2170     }
2171
2172     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2173
2174     ADVANCE_BCS_BATCH(batch);
2175 }
2176
2177 static int
2178 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
2179 {
2180     int out_slice_data_bit_offset;
2181     int slice_header_size = in_slice_data_bit_offset / 8;
2182     int i, j;
2183
2184     if (profile != 3)
2185         out_slice_data_bit_offset = in_slice_data_bit_offset;
2186     else {
2187         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
2188             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
2189                 if (i < slice_header_size - 1)
2190                     i++, j += 2;
2191                 else {
2192                     buf[j + 2] = buf[j + 1];
2193                     j++;
2194                 }
2195             }
2196         }
2197
2198         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
2199     }
2200
2201     return out_slice_data_bit_offset;
2202 }
2203
2204 static void
2205 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
2206                         VAPictureParameterBufferVC1 *pic_param,
2207                         VASliceParameterBufferVC1 *slice_param,
2208                         VASliceParameterBufferVC1 *next_slice_param,
2209                         dri_bo *slice_data_bo,
2210                         struct gen7_mfd_context *gen7_mfd_context)
2211 {
2212     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2213     int next_slice_start_vert_pos;
2214     int macroblock_offset;
2215     uint8_t *slice_data = NULL;
2216
2217     dri_bo_map(slice_data_bo, True);
2218     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
2219     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
2220                                                                slice_param->macroblock_offset,
2221                                                                pic_param->sequence_fields.bits.profile);
2222     dri_bo_unmap(slice_data_bo);
2223
2224     if (next_slice_param)
2225         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
2226     else if (!pic_param->sequence_fields.bits.interlace ||
2227              pic_param->picture_fields.bits.frame_coding_mode < 2) /* Progressive or Frame-Interlace */
2228         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
2229     else /* Field-Interlace */
2230         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 32) / 32;
2231
2232     BEGIN_BCS_BATCH(batch, 5);
2233     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
2234     OUT_BCS_BATCH(batch,
2235                   slice_param->slice_data_size - (macroblock_offset >> 3));
2236     OUT_BCS_BATCH(batch,
2237                   slice_param->slice_data_offset + (macroblock_offset >> 3));
2238     OUT_BCS_BATCH(batch,
2239                   slice_param->slice_vertical_position << 16 |
2240                   next_slice_start_vert_pos << 0);
2241     OUT_BCS_BATCH(batch,
2242                   (macroblock_offset & 0x7));
2243     ADVANCE_BCS_BATCH(batch);
2244 }
2245
2246 static void
2247 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
2248                             struct decode_state *decode_state,
2249                             struct gen7_mfd_context *gen7_mfd_context)
2250 {
2251     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2252     VAPictureParameterBufferVC1 *pic_param;
2253     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
2254     dri_bo *slice_data_bo;
2255     int i, j;
2256
2257     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2258     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2259
2260     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
2261     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2262     intel_batchbuffer_emit_mi_flush(batch);
2263     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2264     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2265     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2266     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2267     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
2268     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
2269     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
2270
2271     for (j = 0; j < decode_state->num_slice_params; j++) {
2272         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2273         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2274         slice_data_bo = decode_state->slice_datas[j]->bo;
2275         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
2276
2277         if (j == decode_state->num_slice_params - 1)
2278             next_slice_group_param = NULL;
2279         else
2280             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2281
2282         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2283             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2284
2285             if (i < decode_state->slice_params[j]->num_elements - 1)
2286                 next_slice_param = slice_param + 1;
2287             else
2288                 next_slice_param = next_slice_group_param;
2289
2290             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2291             slice_param++;
2292         }
2293     }
2294
2295     intel_batchbuffer_end_atomic(batch);
2296     intel_batchbuffer_flush(batch);
2297 }
2298
2299 static void
2300 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
2301                           struct decode_state *decode_state,
2302                           struct gen7_mfd_context *gen7_mfd_context)
2303 {
2304     struct object_surface *obj_surface;
2305     VAPictureParameterBufferJPEGBaseline *pic_param;
2306     int subsampling = SUBSAMPLE_YUV420;
2307     int fourcc = VA_FOURCC_IMC3;
2308
2309     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2310
2311     if (pic_param->num_components == 1) {
2312         subsampling = SUBSAMPLE_YUV400;
2313         fourcc = VA_FOURCC_Y800;
2314     } else if (pic_param->num_components == 3) {
2315         int h1 = pic_param->components[0].h_sampling_factor;
2316         int h2 = pic_param->components[1].h_sampling_factor;
2317         int h3 = pic_param->components[2].h_sampling_factor;
2318         int v1 = pic_param->components[0].v_sampling_factor;
2319         int v2 = pic_param->components[1].v_sampling_factor;
2320         int v3 = pic_param->components[2].v_sampling_factor;
2321
2322         if (h1 == 2 * h2 && h2 == h3 &&
2323             v1 == 2 * v2 && v2 == v3) {
2324             subsampling = SUBSAMPLE_YUV420;
2325             fourcc = VA_FOURCC_IMC3;
2326         } else if (h1 == 2 * h2  && h2 == h3 &&
2327                    v1 == v2 && v2 == v3) {
2328             subsampling = SUBSAMPLE_YUV422H;
2329             fourcc = VA_FOURCC_422H;
2330         } else if (h1 == h2 && h2 == h3 &&
2331                    v1 == v2  && v2 == v3) {
2332             subsampling = SUBSAMPLE_YUV444;
2333             fourcc = VA_FOURCC_444P;
2334         } else if (h1 == 4 * h2 && h2 ==  h3 &&
2335                    v1 == v2 && v2 == v3) {
2336             subsampling = SUBSAMPLE_YUV411;
2337             fourcc = VA_FOURCC_411P;
2338         } else if (h1 == h2 && h2 == h3 &&
2339                    v1 == 2 * v2 && v2 == v3) {
2340             subsampling = SUBSAMPLE_YUV422V;
2341             fourcc = VA_FOURCC_422V;
2342         } else
2343             assert(0);
2344     } else {
2345         assert(0);
2346     }
2347
2348     /* Current decoded picture */
2349     obj_surface = decode_state->render_object;
2350     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
2351
2352     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2353     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2354     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2355     gen7_mfd_context->pre_deblocking_output.valid = 1;
2356
2357     gen7_mfd_context->post_deblocking_output.bo = NULL;
2358     gen7_mfd_context->post_deblocking_output.valid = 0;
2359
2360     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2361     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2362
2363     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2364     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2365
2366     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2367     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2368
2369     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2370     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2371
2372     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2373     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2374 }
2375
2376 static const int va_to_gen7_jpeg_rotation[4] = {
2377     GEN7_JPEG_ROTATION_0,
2378     GEN7_JPEG_ROTATION_90,
2379     GEN7_JPEG_ROTATION_180,
2380     GEN7_JPEG_ROTATION_270
2381 };
2382
2383 static void
2384 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
2385                         struct decode_state *decode_state,
2386                         struct gen7_mfd_context *gen7_mfd_context)
2387 {
2388     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2389     VAPictureParameterBufferJPEGBaseline *pic_param;
2390     int chroma_type = GEN7_YUV420;
2391     int frame_width_in_blks;
2392     int frame_height_in_blks;
2393
2394     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2395     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2396
2397     if (pic_param->num_components == 1)
2398         chroma_type = GEN7_YUV400;
2399     else if (pic_param->num_components == 3) {
2400         int h1 = pic_param->components[0].h_sampling_factor;
2401         int h2 = pic_param->components[1].h_sampling_factor;
2402         int h3 = pic_param->components[2].h_sampling_factor;
2403         int v1 = pic_param->components[0].v_sampling_factor;
2404         int v2 = pic_param->components[1].v_sampling_factor;
2405         int v3 = pic_param->components[2].v_sampling_factor;
2406
2407         if (h1 == 2 * h2 && h2 == h3 &&
2408             v1 == 2 * v2 && v2 == v3)
2409             chroma_type = GEN7_YUV420;
2410         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2411                  v1 == 1 && v2 == 1 && v3 == 1)
2412             chroma_type = GEN7_YUV422H_2Y;
2413         else if (h1 == h2 && h2 == h3 &&
2414                  v1 == v2 && v2 == v3)
2415             chroma_type = GEN7_YUV444;
2416         else if (h1 == 4 * h2 && h2 == h3 &&
2417                  v1 == v2 && v2 == v3)
2418             chroma_type = GEN7_YUV411;
2419         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2420                  v1 == 2 && v2 == 1 && v3 == 1)
2421             chroma_type = GEN7_YUV422V_2Y;
2422         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2423                  v1 == 2 && v2 == 2 && v3 == 2)
2424             chroma_type = GEN7_YUV422H_4Y;
2425         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2426                  v1 == 2 && v2 == 1 && v3 == 1)
2427             chroma_type = GEN7_YUV422V_4Y;
2428         else
2429             assert(0);
2430     }
2431
2432     if (chroma_type == GEN7_YUV400 ||
2433         chroma_type == GEN7_YUV444 ||
2434         chroma_type == GEN7_YUV422V_2Y) {
2435         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2436         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2437     } else if (chroma_type == GEN7_YUV411) {
2438         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2439         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2440     } else {
2441         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2442         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2443     }
2444
2445     BEGIN_BCS_BATCH(batch, 3);
2446     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2447     OUT_BCS_BATCH(batch,
2448                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2449                   (chroma_type << 0));
2450     OUT_BCS_BATCH(batch,
2451                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2452                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2453     ADVANCE_BCS_BATCH(batch);
2454 }
2455
2456 static const int va_to_gen7_jpeg_hufftable[2] = {
2457     MFX_HUFFTABLE_ID_Y,
2458     MFX_HUFFTABLE_ID_UV
2459 };
2460
2461 static void
2462 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2463                                struct decode_state *decode_state,
2464                                struct gen7_mfd_context *gen7_mfd_context,
2465                                int num_tables)
2466 {
2467     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2468     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2469     int index;
2470
2471     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2472         return;
2473
2474     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2475
2476     for (index = 0; index < num_tables; index++) {
2477         int id = va_to_gen7_jpeg_hufftable[index];
2478         if (!huffman_table->load_huffman_table[index])
2479             continue;
2480         BEGIN_BCS_BATCH(batch, 53);
2481         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2482         OUT_BCS_BATCH(batch, id);
2483         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2484         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2485         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2486         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2487         ADVANCE_BCS_BATCH(batch);
2488     }
2489 }
2490
2491 static const int va_to_gen7_jpeg_qm[5] = {
2492     -1,
2493     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2494     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2495     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2496     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2497 };
2498
2499 static void
2500 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2501                        struct decode_state *decode_state,
2502                        struct gen7_mfd_context *gen7_mfd_context)
2503 {
2504     VAPictureParameterBufferJPEGBaseline *pic_param;
2505     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2506     int index;
2507
2508     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2509         return;
2510
2511     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2512     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2513
2514     assert(pic_param->num_components <= 3);
2515
2516     for (index = 0; index < pic_param->num_components; index++) {
2517         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2518         int qm_type;
2519         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2520         unsigned char raster_qm[64];
2521         int j;
2522
2523         if (id > 4 || id < 1)
2524             continue;
2525
2526         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2527             continue;
2528
2529         qm_type = va_to_gen7_jpeg_qm[id];
2530
2531         for (j = 0; j < 64; j++)
2532             raster_qm[zigzag_direct[j]] = qm[j];
2533
2534         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2535     }
2536 }
2537
2538 static void
2539 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2540                          VAPictureParameterBufferJPEGBaseline *pic_param,
2541                          VASliceParameterBufferJPEGBaseline *slice_param,
2542                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2543                          dri_bo *slice_data_bo,
2544                          struct gen7_mfd_context *gen7_mfd_context)
2545 {
2546     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2547     int scan_component_mask = 0;
2548     int i;
2549
2550     assert(slice_param->num_components > 0);
2551     assert(slice_param->num_components < 4);
2552     assert(slice_param->num_components <= pic_param->num_components);
2553
2554     for (i = 0; i < slice_param->num_components; i++) {
2555         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2556         case 1:
2557             scan_component_mask |= (1 << 0);
2558             break;
2559         case 2:
2560             scan_component_mask |= (1 << 1);
2561             break;
2562         case 3:
2563             scan_component_mask |= (1 << 2);
2564             break;
2565         default:
2566             assert(0);
2567             break;
2568         }
2569     }
2570
2571     BEGIN_BCS_BATCH(batch, 6);
2572     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2573     OUT_BCS_BATCH(batch,
2574                   slice_param->slice_data_size);
2575     OUT_BCS_BATCH(batch,
2576                   slice_param->slice_data_offset);
2577     OUT_BCS_BATCH(batch,
2578                   slice_param->slice_horizontal_position << 16 |
2579                   slice_param->slice_vertical_position << 0);
2580     OUT_BCS_BATCH(batch,
2581                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2582                   (scan_component_mask << 27) |                 /* scan components */
2583                   (0 << 26) |   /* disable interrupt allowed */
2584                   (slice_param->num_mcus << 0));                /* MCU count */
2585     OUT_BCS_BATCH(batch,
2586                   (slice_param->restart_interval << 0));    /* RestartInterval */
2587     ADVANCE_BCS_BATCH(batch);
2588 }
2589
2590 /* Workaround for JPEG decoding on Ivybridge */
2591 #ifdef JPEG_WA
2592
2593 static struct {
2594     int width;
2595     int height;
2596     unsigned char data[32];
2597     int data_size;
2598     int data_bit_offset;
2599     int qp;
2600 } gen7_jpeg_wa_clip = {
2601     16,
2602     16,
2603     {
2604         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2605         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2606     },
2607     14,
2608     40,
2609     28,
2610 };
2611
2612 static void
2613 gen8_jpeg_wa_init(VADriverContextP ctx,
2614                   struct gen7_mfd_context *gen7_mfd_context)
2615 {
2616     struct i965_driver_data *i965 = i965_driver_data(ctx);
2617     VAStatus status;
2618     struct object_surface *obj_surface;
2619
2620     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2621         i965_DestroySurfaces(ctx,
2622                              &gen7_mfd_context->jpeg_wa_surface_id,
2623                              1);
2624
2625     status = i965_CreateSurfaces(ctx,
2626                                  gen7_jpeg_wa_clip.width,
2627                                  gen7_jpeg_wa_clip.height,
2628                                  VA_RT_FORMAT_YUV420,
2629                                  1,
2630                                  &gen7_mfd_context->jpeg_wa_surface_id);
2631     assert(status == VA_STATUS_SUCCESS);
2632
2633     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2634     assert(obj_surface);
2635     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2636     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2637
2638     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2639         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2640                                                                "JPEG WA data",
2641                                                                0x1000,
2642                                                                0x1000);
2643         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2644                        0,
2645                        gen7_jpeg_wa_clip.data_size,
2646                        gen7_jpeg_wa_clip.data);
2647     }
2648 }
2649
2650 static void
2651 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2652                               struct gen7_mfd_context *gen7_mfd_context)
2653 {
2654     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2655
2656     BEGIN_BCS_BATCH(batch, 5);
2657     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2658     OUT_BCS_BATCH(batch,
2659                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2660                   (MFD_MODE_VLD << 15) | /* VLD mode */
2661                   (0 << 10) | /* disable Stream-Out */
2662                   (0 << 9)  | /* Post Deblocking Output */
2663                   (1 << 8)  | /* Pre Deblocking Output */
2664                   (0 << 5)  | /* not in stitch mode */
2665                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2666                   (MFX_FORMAT_AVC << 0));
2667     OUT_BCS_BATCH(batch,
2668                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2669                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2670                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2671                   (0 << 1)  |
2672                   (0 << 0));
2673     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2674     OUT_BCS_BATCH(batch, 0); /* reserved */
2675     ADVANCE_BCS_BATCH(batch);
2676 }
2677
2678 static void
2679 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2680                            struct gen7_mfd_context *gen7_mfd_context)
2681 {
2682     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2683     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2684
2685     BEGIN_BCS_BATCH(batch, 6);
2686     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2687     OUT_BCS_BATCH(batch, 0);
2688     OUT_BCS_BATCH(batch,
2689                   ((obj_surface->orig_width - 1) << 18) |
2690                   ((obj_surface->orig_height - 1) << 4));
2691     OUT_BCS_BATCH(batch,
2692                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2693                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2694                   (0 << 22) | /* surface object control state, ignored */
2695                   ((obj_surface->width - 1) << 3) | /* pitch */
2696                   (0 << 2)  | /* must be 0 */
2697                   (1 << 1)  | /* must be tiled */
2698                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2699     OUT_BCS_BATCH(batch,
2700                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2701                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2702     OUT_BCS_BATCH(batch,
2703                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2704                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2705     ADVANCE_BCS_BATCH(batch);
2706 }
2707
2708 static void
2709 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2710                                  struct gen7_mfd_context *gen7_mfd_context)
2711 {
2712     struct i965_driver_data *i965 = i965_driver_data(ctx);
2713     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2714     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2715     dri_bo *intra_bo;
2716     int i;
2717
2718     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2719                             "intra row store",
2720                             128 * 64,
2721                             0x1000);
2722
2723     BEGIN_BCS_BATCH(batch, 61);
2724     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2725     OUT_BCS_RELOC64(batch,
2726                     obj_surface->bo,
2727                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2728                     0);
2729     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2730
2731
2732     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2733     OUT_BCS_BATCH(batch, 0);
2734     OUT_BCS_BATCH(batch, 0);
2735
2736     /* uncompressed-video & stream out 7-12 */
2737     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2738     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2739     OUT_BCS_BATCH(batch, 0);
2740     OUT_BCS_BATCH(batch, 0);
2741     OUT_BCS_BATCH(batch, 0);
2742     OUT_BCS_BATCH(batch, 0);
2743
2744     /* the DW 13-15 is for intra row store scratch */
2745     OUT_BCS_RELOC64(batch,
2746                     intra_bo,
2747                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2748                     0);
2749
2750     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2751
2752     /* the DW 16-18 is for deblocking filter */
2753     OUT_BCS_BATCH(batch, 0);
2754     OUT_BCS_BATCH(batch, 0);
2755     OUT_BCS_BATCH(batch, 0);
2756
2757     /* DW 19..50 */
2758     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2759         OUT_BCS_BATCH(batch, 0);
2760         OUT_BCS_BATCH(batch, 0);
2761     }
2762     OUT_BCS_BATCH(batch, 0);
2763
2764     /* the DW52-54 is for mb status address */
2765     OUT_BCS_BATCH(batch, 0);
2766     OUT_BCS_BATCH(batch, 0);
2767     OUT_BCS_BATCH(batch, 0);
2768     /* the DW56-60 is for ILDB & second ILDB address */
2769     OUT_BCS_BATCH(batch, 0);
2770     OUT_BCS_BATCH(batch, 0);
2771     OUT_BCS_BATCH(batch, 0);
2772     OUT_BCS_BATCH(batch, 0);
2773     OUT_BCS_BATCH(batch, 0);
2774     OUT_BCS_BATCH(batch, 0);
2775
2776     ADVANCE_BCS_BATCH(batch);
2777
2778     dri_bo_unreference(intra_bo);
2779 }
2780
2781 static void
2782 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2783                                      struct gen7_mfd_context *gen7_mfd_context)
2784 {
2785     struct i965_driver_data *i965 = i965_driver_data(ctx);
2786     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2787     dri_bo *bsd_mpc_bo, *mpr_bo;
2788
2789     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2790                               "bsd mpc row store",
2791                               11520, /* 1.5 * 120 * 64 */
2792                               0x1000);
2793
2794     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2795                           "mpr row store",
2796                           7680, /* 1. 0 * 120 * 64 */
2797                           0x1000);
2798
2799     BEGIN_BCS_BATCH(batch, 10);
2800     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2801
2802     OUT_BCS_RELOC64(batch,
2803                     bsd_mpc_bo,
2804                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2805                     0);
2806
2807     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2808
2809     OUT_BCS_RELOC64(batch,
2810                     mpr_bo,
2811                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2812                     0);
2813     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2814
2815     OUT_BCS_BATCH(batch, 0);
2816     OUT_BCS_BATCH(batch, 0);
2817     OUT_BCS_BATCH(batch, 0);
2818
2819     ADVANCE_BCS_BATCH(batch);
2820
2821     dri_bo_unreference(bsd_mpc_bo);
2822     dri_bo_unreference(mpr_bo);
2823 }
2824
2825 static void
2826 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2827                           struct gen7_mfd_context *gen7_mfd_context)
2828 {
2829
2830 }
2831
2832 static void
2833 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2834                            struct gen7_mfd_context *gen7_mfd_context)
2835 {
2836     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2837     int img_struct = 0;
2838     int mbaff_frame_flag = 0;
2839     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2840
2841     BEGIN_BCS_BATCH(batch, 16);
2842     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2843     OUT_BCS_BATCH(batch,
2844                   width_in_mbs * height_in_mbs);
2845     OUT_BCS_BATCH(batch,
2846                   ((height_in_mbs - 1) << 16) |
2847                   ((width_in_mbs - 1) << 0));
2848     OUT_BCS_BATCH(batch,
2849                   (0 << 24) |
2850                   (0 << 16) |
2851                   (0 << 14) |
2852                   (0 << 13) |
2853                   (0 << 12) | /* differ from GEN6 */
2854                   (0 << 10) |
2855                   (img_struct << 8));
2856     OUT_BCS_BATCH(batch,
2857                   (1 << 10) | /* 4:2:0 */
2858                   (1 << 7) |  /* CABAC */
2859                   (0 << 6) |
2860                   (0 << 5) |
2861                   (0 << 4) |
2862                   (0 << 3) |
2863                   (1 << 2) |
2864                   (mbaff_frame_flag << 1) |
2865                   (0 << 0));
2866     OUT_BCS_BATCH(batch, 0);
2867     OUT_BCS_BATCH(batch, 0);
2868     OUT_BCS_BATCH(batch, 0);
2869     OUT_BCS_BATCH(batch, 0);
2870     OUT_BCS_BATCH(batch, 0);
2871     OUT_BCS_BATCH(batch, 0);
2872     OUT_BCS_BATCH(batch, 0);
2873     OUT_BCS_BATCH(batch, 0);
2874     OUT_BCS_BATCH(batch, 0);
2875     OUT_BCS_BATCH(batch, 0);
2876     OUT_BCS_BATCH(batch, 0);
2877     ADVANCE_BCS_BATCH(batch);
2878 }
2879
2880 static void
2881 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2882                                   struct gen7_mfd_context *gen7_mfd_context)
2883 {
2884     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2885     int i;
2886
2887     BEGIN_BCS_BATCH(batch, 71);
2888     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2889
2890     /* reference surfaces 0..15 */
2891     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2892         OUT_BCS_BATCH(batch, 0); /* top */
2893         OUT_BCS_BATCH(batch, 0); /* bottom */
2894     }
2895
2896     OUT_BCS_BATCH(batch, 0);
2897
2898     /* the current decoding frame/field */
2899     OUT_BCS_BATCH(batch, 0); /* top */
2900     OUT_BCS_BATCH(batch, 0);
2901     OUT_BCS_BATCH(batch, 0);
2902
2903     /* POC List */
2904     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2905         OUT_BCS_BATCH(batch, 0);
2906         OUT_BCS_BATCH(batch, 0);
2907     }
2908
2909     OUT_BCS_BATCH(batch, 0);
2910     OUT_BCS_BATCH(batch, 0);
2911
2912     ADVANCE_BCS_BATCH(batch);
2913 }
2914
2915 static void
2916 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2917                                      struct gen7_mfd_context *gen7_mfd_context)
2918 {
2919     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2920
2921     BEGIN_BCS_BATCH(batch, 11);
2922     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2923     OUT_BCS_RELOC64(batch,
2924                     gen7_mfd_context->jpeg_wa_slice_data_bo,
2925                     I915_GEM_DOMAIN_INSTRUCTION, 0,
2926                     0);
2927     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2928     OUT_BCS_BATCH(batch, 0);
2929     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2930     OUT_BCS_BATCH(batch, 0);
2931     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2932     OUT_BCS_BATCH(batch, 0);
2933     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2934     OUT_BCS_BATCH(batch, 0);
2935     ADVANCE_BCS_BATCH(batch);
2936 }
2937
2938 static void
2939 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2940                             struct gen7_mfd_context *gen7_mfd_context)
2941 {
2942     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2943
2944     /* the input bitsteam format on GEN7 differs from GEN6 */
2945     BEGIN_BCS_BATCH(batch, 6);
2946     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2947     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2948     OUT_BCS_BATCH(batch, 0);
2949     OUT_BCS_BATCH(batch,
2950                   (0 << 31) |
2951                   (0 << 14) |
2952                   (0 << 12) |
2953                   (0 << 10) |
2954                   (0 << 8));
2955     OUT_BCS_BATCH(batch,
2956                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2957                   (0 << 5)  |
2958                   (0 << 4)  |
2959                   (1 << 3) | /* LastSlice Flag */
2960                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2961     OUT_BCS_BATCH(batch, 0);
2962     ADVANCE_BCS_BATCH(batch);
2963 }
2964
2965 static void
2966 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2967                              struct gen7_mfd_context *gen7_mfd_context)
2968 {
2969     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2970     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2971     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2972     int first_mb_in_slice = 0;
2973     int slice_type = SLICE_TYPE_I;
2974
2975     BEGIN_BCS_BATCH(batch, 11);
2976     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2977     OUT_BCS_BATCH(batch, slice_type);
2978     OUT_BCS_BATCH(batch,
2979                   (num_ref_idx_l1 << 24) |
2980                   (num_ref_idx_l0 << 16) |
2981                   (0 << 8) |
2982                   (0 << 0));
2983     OUT_BCS_BATCH(batch,
2984                   (0 << 29) |
2985                   (1 << 27) |   /* disable Deblocking */
2986                   (0 << 24) |
2987                   (gen7_jpeg_wa_clip.qp << 16) |
2988                   (0 << 8) |
2989                   (0 << 0));
2990     OUT_BCS_BATCH(batch,
2991                   (slice_ver_pos << 24) |
2992                   (slice_hor_pos << 16) |
2993                   (first_mb_in_slice << 0));
2994     OUT_BCS_BATCH(batch,
2995                   (next_slice_ver_pos << 16) |
2996                   (next_slice_hor_pos << 0));
2997     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2998     OUT_BCS_BATCH(batch, 0);
2999     OUT_BCS_BATCH(batch, 0);
3000     OUT_BCS_BATCH(batch, 0);
3001     OUT_BCS_BATCH(batch, 0);
3002     ADVANCE_BCS_BATCH(batch);
3003 }
3004
3005 static void
3006 gen8_mfd_jpeg_wa(VADriverContextP ctx,
3007                  struct gen7_mfd_context *gen7_mfd_context)
3008 {
3009     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3010     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
3011     intel_batchbuffer_emit_mi_flush(batch);
3012     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
3013     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
3014     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
3015     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
3016     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
3017     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
3018     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
3019
3020     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
3021     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
3022     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
3023 }
3024
3025 #endif
3026
3027 void
3028 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
3029                              struct decode_state *decode_state,
3030                              struct gen7_mfd_context *gen7_mfd_context)
3031 {
3032     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3033     VAPictureParameterBufferJPEGBaseline *pic_param;
3034     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
3035     dri_bo *slice_data_bo;
3036     int i, j, max_selector = 0;
3037
3038     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3039     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
3040
3041     /* Currently only support Baseline DCT */
3042     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
3043     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3044 #ifdef JPEG_WA
3045     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
3046 #endif
3047     intel_batchbuffer_emit_mi_flush(batch);
3048     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3049     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3050     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3051     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
3052     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
3053
3054     for (j = 0; j < decode_state->num_slice_params; j++) {
3055         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3056         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3057         slice_data_bo = decode_state->slice_datas[j]->bo;
3058         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3059
3060         if (j == decode_state->num_slice_params - 1)
3061             next_slice_group_param = NULL;
3062         else
3063             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3064
3065         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3066             int component;
3067
3068             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3069
3070             if (i < decode_state->slice_params[j]->num_elements - 1)
3071                 next_slice_param = slice_param + 1;
3072             else
3073                 next_slice_param = next_slice_group_param;
3074
3075             for (component = 0; component < slice_param->num_components; component++) {
3076                 if (max_selector < slice_param->components[component].dc_table_selector)
3077                     max_selector = slice_param->components[component].dc_table_selector;
3078
3079                 if (max_selector < slice_param->components[component].ac_table_selector)
3080                     max_selector = slice_param->components[component].ac_table_selector;
3081             }
3082
3083             slice_param++;
3084         }
3085     }
3086
3087     assert(max_selector < 2);
3088     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
3089
3090     for (j = 0; j < decode_state->num_slice_params; j++) {
3091         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3092         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3093         slice_data_bo = decode_state->slice_datas[j]->bo;
3094         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3095
3096         if (j == decode_state->num_slice_params - 1)
3097             next_slice_group_param = NULL;
3098         else
3099             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3100
3101         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3102             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3103
3104             if (i < decode_state->slice_params[j]->num_elements - 1)
3105                 next_slice_param = slice_param + 1;
3106             else
3107                 next_slice_param = next_slice_group_param;
3108
3109             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
3110             slice_param++;
3111         }
3112     }
3113
3114     intel_batchbuffer_end_atomic(batch);
3115     intel_batchbuffer_flush(batch);
3116 }
3117
3118 static const int vp8_dc_qlookup[128] = {
3119     4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
3120     18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
3121     29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
3122     44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
3123     59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
3124     75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
3125     91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
3126     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
3127 };
3128
3129 static const int vp8_ac_qlookup[128] = {
3130     4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
3131     20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
3132     36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
3133     52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
3134     78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
3135     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
3136     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
3137     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
3138 };
3139
3140 static inline unsigned int vp8_clip_quantization_index(int index)
3141 {
3142     if (index > 127)
3143         return 127;
3144     else if (index < 0)
3145         return 0;
3146
3147     return index;
3148 }
3149
3150 static void
3151 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
3152                          struct decode_state *decode_state,
3153                          struct gen7_mfd_context *gen7_mfd_context)
3154 {
3155     struct object_surface *obj_surface;
3156     struct i965_driver_data *i965 = i965_driver_data(ctx);
3157     dri_bo *bo;
3158     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3159     int width_in_mbs = (pic_param->frame_width + 15) / 16;
3160     int height_in_mbs = (pic_param->frame_height + 15) / 16;
3161
3162     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
3163     assert(height_in_mbs > 0 && height_in_mbs <= 256);
3164
3165     intel_update_vp8_frame_store_index(ctx,
3166                                        decode_state,
3167                                        pic_param,
3168                                        gen7_mfd_context->reference_surface);
3169
3170     /* Current decoded picture */
3171     obj_surface = decode_state->render_object;
3172     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3173
3174     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3175     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
3176     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
3177     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
3178
3179     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3180     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
3181     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
3182     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
3183
3184     intel_ensure_vp8_segmentation_buffer(ctx,
3185                                          &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
3186
3187     /* The same as AVC */
3188     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3189     bo = dri_bo_alloc(i965->intel.bufmgr,
3190                       "intra row store",
3191                       width_in_mbs * 64,
3192                       0x1000);
3193     assert(bo);
3194     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
3195     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
3196
3197     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3198     bo = dri_bo_alloc(i965->intel.bufmgr,
3199                       "deblocking filter row store",
3200                       width_in_mbs * 64 * 4,
3201                       0x1000);
3202     assert(bo);
3203     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
3204     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
3205
3206     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3207     bo = dri_bo_alloc(i965->intel.bufmgr,
3208                       "bsd mpc row store",
3209                       width_in_mbs * 64 * 2,
3210                       0x1000);
3211     assert(bo);
3212     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
3213     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
3214
3215     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3216     bo = dri_bo_alloc(i965->intel.bufmgr,
3217                       "mpr row store",
3218                       width_in_mbs * 64 * 2,
3219                       0x1000);
3220     assert(bo);
3221     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
3222     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
3223
3224     gen7_mfd_context->bitplane_read_buffer.valid = 0;
3225 }
3226
3227 static void
3228 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
3229                        struct decode_state *decode_state,
3230                        struct gen7_mfd_context *gen7_mfd_context)
3231 {
3232     struct i965_driver_data *i965 = i965_driver_data(ctx);
3233     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3234     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3235     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
3236     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
3237     dri_bo *probs_bo = decode_state->probability_data->bo;
3238     int i, j, log2num;
3239     unsigned int quantization_value[4][6];
3240
3241     /* There is no safe way to error out if the segmentation buffer
3242        could not be allocated. So, instead of aborting, simply decode
3243        something even if the result may look totally inacurate */
3244     const unsigned int enable_segmentation =
3245         pic_param->pic_fields.bits.segmentation_enabled &&
3246         gen7_mfd_context->segmentation_buffer.valid;
3247
3248     log2num = (int)log2(slice_param->num_of_partitions - 1);
3249
3250     BEGIN_BCS_BATCH(batch, 38);
3251     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
3252     OUT_BCS_BATCH(batch,
3253                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
3254                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
3255     OUT_BCS_BATCH(batch,
3256                   log2num << 24 |
3257                   pic_param->pic_fields.bits.sharpness_level << 16 |
3258                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
3259                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
3260                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
3261                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
3262                   (enable_segmentation &&
3263                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
3264                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
3265                   (enable_segmentation &&
3266                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
3267                   (enable_segmentation &&
3268                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
3269                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
3270                   pic_param->pic_fields.bits.filter_type << 4 |
3271                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
3272                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
3273
3274     OUT_BCS_BATCH(batch,
3275                   pic_param->loop_filter_level[3] << 24 |
3276                   pic_param->loop_filter_level[2] << 16 |
3277                   pic_param->loop_filter_level[1] <<  8 |
3278                   pic_param->loop_filter_level[0] <<  0);
3279
3280     /* Quantizer Value for 4 segmetns, DW4-DW15 */
3281     for (i = 0; i < 4; i++) {
3282         quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
3283         quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
3284         quantization_value[i][2] = 2 * vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])]; /*y2dc*/
3285         /* 101581>>16 is equivalent to 155/100 */
3286         quantization_value[i][3] = (101581 * vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16; /*y2ac*/
3287         quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
3288         quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
3289
3290         quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
3291         quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
3292
3293         OUT_BCS_BATCH(batch,
3294                       quantization_value[i][0] << 16 | /* Y1AC */
3295                       quantization_value[i][1] <<  0); /* Y1DC */
3296         OUT_BCS_BATCH(batch,
3297                       quantization_value[i][5] << 16 | /* UVAC */
3298                       quantization_value[i][4] <<  0); /* UVDC */
3299         OUT_BCS_BATCH(batch,
3300                       quantization_value[i][3] << 16 | /* Y2AC */
3301                       quantization_value[i][2] <<  0); /* Y2DC */
3302     }
3303
3304     /* CoeffProbability table for non-key frame, DW16-DW18 */
3305     if (probs_bo) {
3306         OUT_BCS_RELOC64(batch, probs_bo,
3307                         0, I915_GEM_DOMAIN_INSTRUCTION,
3308                         0);
3309         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
3310     } else {
3311         OUT_BCS_BATCH(batch, 0);
3312         OUT_BCS_BATCH(batch, 0);
3313         OUT_BCS_BATCH(batch, 0);
3314     }
3315
3316     OUT_BCS_BATCH(batch,
3317                   pic_param->mb_segment_tree_probs[2] << 16 |
3318                   pic_param->mb_segment_tree_probs[1] <<  8 |
3319                   pic_param->mb_segment_tree_probs[0] <<  0);
3320
3321     OUT_BCS_BATCH(batch,
3322                   pic_param->prob_skip_false << 24 |
3323                   pic_param->prob_intra      << 16 |
3324                   pic_param->prob_last       <<  8 |
3325                   pic_param->prob_gf         <<  0);
3326
3327     OUT_BCS_BATCH(batch,
3328                   pic_param->y_mode_probs[3] << 24 |
3329                   pic_param->y_mode_probs[2] << 16 |
3330                   pic_param->y_mode_probs[1] <<  8 |
3331                   pic_param->y_mode_probs[0] <<  0);
3332
3333     OUT_BCS_BATCH(batch,
3334                   pic_param->uv_mode_probs[2] << 16 |
3335                   pic_param->uv_mode_probs[1] <<  8 |
3336                   pic_param->uv_mode_probs[0] <<  0);
3337
3338     /* MV update value, DW23-DW32 */
3339     for (i = 0; i < 2; i++) {
3340         for (j = 0; j < 20; j += 4) {
3341             OUT_BCS_BATCH(batch,
3342                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
3343                           pic_param->mv_probs[i][j + 2] << 16 |
3344                           pic_param->mv_probs[i][j + 1] <<  8 |
3345                           pic_param->mv_probs[i][j + 0] <<  0);
3346         }
3347     }
3348
3349     OUT_BCS_BATCH(batch,
3350                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
3351                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
3352                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
3353                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
3354
3355     OUT_BCS_BATCH(batch,
3356                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
3357                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
3358                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
3359                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
3360
3361     /* segmentation id stream base address, DW35-DW37 */
3362     if (enable_segmentation) {
3363         OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
3364                         0, I915_GEM_DOMAIN_INSTRUCTION,
3365                         0);
3366         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
3367     } else {
3368         OUT_BCS_BATCH(batch, 0);
3369         OUT_BCS_BATCH(batch, 0);
3370         OUT_BCS_BATCH(batch, 0);
3371     }
3372     ADVANCE_BCS_BATCH(batch);
3373 }
3374
3375 static void
3376 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
3377                         VAPictureParameterBufferVP8 *pic_param,
3378                         VASliceParameterBufferVP8 *slice_param,
3379                         dri_bo *slice_data_bo,
3380                         struct gen7_mfd_context *gen7_mfd_context)
3381 {
3382     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3383     int i, log2num;
3384     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7) >> 3);
3385     unsigned int used_bits = 8 - pic_param->bool_coder_ctx.count;
3386     unsigned int partition_size_0 = slice_param->partition_size[0];
3387
3388     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
3389     if (used_bits == 8) {
3390         used_bits = 0;
3391         offset += 1;
3392         partition_size_0 -= 1;
3393     }
3394
3395     assert(slice_param->num_of_partitions >= 2);
3396     assert(slice_param->num_of_partitions <= 9);
3397
3398     log2num = (int)log2(slice_param->num_of_partitions - 1);
3399
3400     BEGIN_BCS_BATCH(batch, 22);
3401     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
3402     OUT_BCS_BATCH(batch,
3403                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
3404                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
3405                   log2num << 4 |
3406                   (slice_param->macroblock_offset & 0x7));
3407     OUT_BCS_BATCH(batch,
3408                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3409                   0);
3410
3411     OUT_BCS_BATCH(batch, partition_size_0 + 1);
3412     OUT_BCS_BATCH(batch, offset);
3413     //partion sizes in bytes are present after the above first partition when there are more than one token partition
3414     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3415     for (i = 1; i < 9; i++) {
3416         if (i < slice_param->num_of_partitions) {
3417             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3418             OUT_BCS_BATCH(batch, offset);
3419         } else {
3420             OUT_BCS_BATCH(batch, 0);
3421             OUT_BCS_BATCH(batch, 0);
3422         }
3423
3424         offset += slice_param->partition_size[i];
3425     }
3426
3427     OUT_BCS_BATCH(batch, 0); /* concealment method */
3428
3429     ADVANCE_BCS_BATCH(batch);
3430 }
3431
3432 void
3433 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3434                             struct decode_state *decode_state,
3435                             struct gen7_mfd_context *gen7_mfd_context)
3436 {
3437     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3438     VAPictureParameterBufferVP8 *pic_param;
3439     VASliceParameterBufferVP8 *slice_param;
3440     dri_bo *slice_data_bo;
3441
3442     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3443     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3444
3445     /* one slice per frame */
3446     if (decode_state->num_slice_params != 1 ||
3447         (!decode_state->slice_params ||
3448          !decode_state->slice_params[0] ||
3449          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3450         (!decode_state->slice_datas ||
3451          !decode_state->slice_datas[0] ||
3452          !decode_state->slice_datas[0]->bo) ||
3453         !decode_state->probability_data) {
3454         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3455
3456         return;
3457     }
3458
3459     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3460     slice_data_bo = decode_state->slice_datas[0]->bo;
3461
3462     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3463     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3464     intel_batchbuffer_emit_mi_flush(batch);
3465     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3466     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3467     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3468     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3469     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3470     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3471     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3472     intel_batchbuffer_end_atomic(batch);
3473     intel_batchbuffer_flush(batch);
3474 }
3475
3476 static VAStatus
3477 gen8_mfd_decode_picture(VADriverContextP ctx,
3478                         VAProfile profile,
3479                         union codec_state *codec_state,
3480                         struct hw_context *hw_context)
3481
3482 {
3483     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3484     struct decode_state *decode_state = &codec_state->decode;
3485     VAStatus vaStatus;
3486
3487     assert(gen7_mfd_context);
3488
3489     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3490
3491     if (vaStatus != VA_STATUS_SUCCESS)
3492         goto out;
3493
3494     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3495
3496     switch (profile) {
3497     case VAProfileMPEG2Simple:
3498     case VAProfileMPEG2Main:
3499         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3500         break;
3501
3502     case VAProfileH264ConstrainedBaseline:
3503     case VAProfileH264Main:
3504     case VAProfileH264High:
3505     case VAProfileH264StereoHigh:
3506     case VAProfileH264MultiviewHigh:
3507         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3508         break;
3509
3510     case VAProfileVC1Simple:
3511     case VAProfileVC1Main:
3512     case VAProfileVC1Advanced:
3513         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3514         break;
3515
3516     case VAProfileJPEGBaseline:
3517         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3518         break;
3519
3520     case VAProfileVP8Version0_3:
3521         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3522         break;
3523
3524     default:
3525         assert(0);
3526         break;
3527     }
3528
3529     vaStatus = VA_STATUS_SUCCESS;
3530
3531 out:
3532     return vaStatus;
3533 }
3534
3535 static void
3536 gen8_mfd_context_destroy(void *hw_context)
3537 {
3538     VADriverContextP ctx;
3539     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3540
3541     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3542
3543     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3544     gen7_mfd_context->post_deblocking_output.bo = NULL;
3545
3546     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3547     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3548
3549     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3550     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3551
3552     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3553     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3554
3555     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3556     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3557
3558     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3559     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3560
3561     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3562     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3563
3564     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3565     gen7_mfd_context->segmentation_buffer.bo = NULL;
3566
3567     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3568
3569     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3570         i965_DestroySurfaces(ctx,
3571                              &gen7_mfd_context->jpeg_wa_surface_id,
3572                              1);
3573         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3574     }
3575
3576     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3577     free(gen7_mfd_context);
3578 }
3579
3580 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3581                                         struct gen7_mfd_context *gen7_mfd_context)
3582 {
3583     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3584     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3585     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3586     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3587 }
3588
3589 struct hw_context *
3590 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3591 {
3592     struct intel_driver_data *intel = intel_driver_data(ctx);
3593     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3594     int i;
3595
3596     if (!gen7_mfd_context)
3597         return NULL;
3598
3599     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3600     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3601     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3602
3603     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3604         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3605         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3606     }
3607
3608     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3609     gen7_mfd_context->segmentation_buffer.valid = 0;
3610
3611     switch (obj_config->profile) {
3612     case VAProfileMPEG2Simple:
3613     case VAProfileMPEG2Main:
3614         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3615         break;
3616
3617     case VAProfileH264ConstrainedBaseline:
3618     case VAProfileH264Main:
3619     case VAProfileH264High:
3620     case VAProfileH264StereoHigh:
3621     case VAProfileH264MultiviewHigh:
3622         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3623         break;
3624     default:
3625         break;
3626     }
3627
3628     gen7_mfd_context->driver_context = ctx;
3629     return (struct hw_context *)gen7_mfd_context;
3630 }