OSDN Git Service

genX_mfd: add comment on dmv buffer scaling
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV     2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77
78         if (!gen7_avc_surface)
79             return;
80
81         gen7_avc_surface->base.frame_store_id = -1;
82         assert((obj_surface->size & 0x3f) == 0);
83         obj_surface->private_data = gen7_avc_surface;
84     }
85
86     /* DMV buffers now relate to the whole frame, irrespective of
87        field coding modes */
88     if (gen7_avc_surface->dmv_top == NULL) {
89         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90                                                  "direct mv w/r buffer",
91                                                  width_in_mbs * height_in_mbs * 128,
92                                                  0x1000);
93         assert(gen7_avc_surface->dmv_top);
94     }
95 }
96
97 static void
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99                           struct decode_state *decode_state,
100                           int standard_select,
101                           struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG ||
109            standard_select == MFX_FORMAT_VP8);
110
111     BEGIN_BCS_BATCH(batch, 5);
112     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
113     OUT_BCS_BATCH(batch,
114                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
115                   (MFD_MODE_VLD << 15) | /* VLD mode */
116                   (0 << 10) | /* disable Stream-Out */
117                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
118                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
119                   (0 << 5)  | /* not in stitch mode */
120                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
121                   (standard_select << 0));
122     OUT_BCS_BATCH(batch,
123                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
124                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
125                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
126                   (0 << 1)  |
127                   (0 << 0));
128     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
129     OUT_BCS_BATCH(batch, 0); /* reserved */
130     ADVANCE_BCS_BATCH(batch);
131 }
132
133 static void
134 gen8_mfd_surface_state(VADriverContextP ctx,
135                        struct decode_state *decode_state,
136                        int standard_select,
137                        struct gen7_mfd_context *gen7_mfd_context)
138 {
139     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140     struct object_surface *obj_surface = decode_state->render_object;
141     unsigned int y_cb_offset;
142     unsigned int y_cr_offset;
143     unsigned int surface_format;
144
145     assert(obj_surface);
146
147     y_cb_offset = obj_surface->y_cb_offset;
148     y_cr_offset = obj_surface->y_cr_offset;
149
150     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151                      MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
152
153     BEGIN_BCS_BATCH(batch, 6);
154     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155     OUT_BCS_BATCH(batch, 0);
156     OUT_BCS_BATCH(batch,
157                   ((obj_surface->orig_height - 1) << 18) |
158                   ((obj_surface->orig_width - 1) << 4));
159     OUT_BCS_BATCH(batch,
160                   (surface_format << 28) | /* 420 planar YUV surface */
161                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162                   (0 << 22) | /* surface object control state, ignored */
163                   ((obj_surface->width - 1) << 3) | /* pitch */
164                   (0 << 2)  | /* must be 0 */
165                   (1 << 1)  | /* must be tiled */
166                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
167     OUT_BCS_BATCH(batch,
168                   (0 << 16) | /* X offset for U(Cb), must be 0 */
169                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
170     OUT_BCS_BATCH(batch,
171                   (0 << 16) | /* X offset for V(Cr), must be 0 */
172                   ((standard_select == MFX_FORMAT_JPEG ? y_cr_offset : 0) << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173     ADVANCE_BCS_BATCH(batch);
174 }
175
176 static void
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178                              struct decode_state *decode_state,
179                              int standard_select,
180                              struct gen7_mfd_context *gen7_mfd_context)
181 {
182     struct i965_driver_data *i965 = i965_driver_data(ctx);
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188     /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                         0);
193     else {
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197     }
198     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
199
200     /* Post-debloing 4-6 */
201     if (gen7_mfd_context->post_deblocking_output.valid)
202         OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
203                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                         0);
205     else {
206         OUT_BCS_BATCH(batch, 0);
207
208         OUT_BCS_BATCH(batch, 0);
209     }
210     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
211
212     /* uncompressed-video & stream out 7-12 */
213     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
214     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
215     OUT_BCS_BATCH(batch, 0);
216     OUT_BCS_BATCH(batch, 0);
217     OUT_BCS_BATCH(batch, 0);
218     OUT_BCS_BATCH(batch, 0);
219
220     /* intra row-store scratch 13-15 */
221     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
222         OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
223                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
224                         0);
225     else {
226         OUT_BCS_BATCH(batch, 0);
227
228         OUT_BCS_BATCH(batch, 0);
229     }
230     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
231
232     /* deblocking-filter-row-store 16-18 */
233     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
234         OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
235                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
236                         0);
237     else {
238         OUT_BCS_BATCH(batch, 0);
239         OUT_BCS_BATCH(batch, 0);
240     }
241
242     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
243
244     /* DW 19..50 */
245     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
246         struct object_surface *obj_surface;
247
248         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
249             gen7_mfd_context->reference_surface[i].obj_surface &&
250             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
251             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
252
253             OUT_BCS_RELOC64(batch, obj_surface->bo,
254                             I915_GEM_DOMAIN_INSTRUCTION, 0,
255                             0);
256         } else {
257             OUT_BCS_BATCH(batch, 0);
258             OUT_BCS_BATCH(batch, 0);
259         }
260
261     }
262
263     /* reference property 51 */
264     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
265
266     /* Macroblock status & ILDB 52-57 */
267     OUT_BCS_BATCH(batch, 0);
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271     OUT_BCS_BATCH(batch, 0);
272     OUT_BCS_BATCH(batch, 0);
273
274     /* the second Macroblock status 58-60 */
275     OUT_BCS_BATCH(batch, 0);
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch, 0);
278
279     ADVANCE_BCS_BATCH(batch);
280 }
281
282 static void
283 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
284                                  dri_bo *slice_data_bo,
285                                  int standard_select,
286                                  struct gen7_mfd_context *gen7_mfd_context)
287 {
288     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
289     struct i965_driver_data *i965 = i965_driver_data(ctx);
290
291     BEGIN_BCS_BATCH(batch, 26);
292     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
293     /* MFX In BS 1-5 */
294     OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
295     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
296     /* Upper bound 4-5 */
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299
300     /* MFX indirect MV 6-10 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307     /* MFX IT_COFF 11-15 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314     /* MFX IT_DBLK 16-20 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321     /* MFX PAK_BSE object for encoder 21-25 */
322     OUT_BCS_BATCH(batch, 0);
323     OUT_BCS_BATCH(batch, 0);
324     OUT_BCS_BATCH(batch, 0);
325     OUT_BCS_BATCH(batch, 0);
326     OUT_BCS_BATCH(batch, 0);
327
328     ADVANCE_BCS_BATCH(batch);
329 }
330
331 static void
332 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
333                                  struct decode_state *decode_state,
334                                  int standard_select,
335                                  struct gen7_mfd_context *gen7_mfd_context)
336 {
337     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
338     struct i965_driver_data *i965 = i965_driver_data(ctx);
339
340     BEGIN_BCS_BATCH(batch, 10);
341     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
342
343     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
344         OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
345                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
346                         0);
347     else {
348         OUT_BCS_BATCH(batch, 0);
349         OUT_BCS_BATCH(batch, 0);
350     }
351
352     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
353     /* MPR Row Store Scratch buffer 4-6 */
354     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
355         OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
356                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
357                         0);
358     else {
359         OUT_BCS_BATCH(batch, 0);
360         OUT_BCS_BATCH(batch, 0);
361     }
362
363     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
364
365     /* Bitplane 7-9 */
366     if (gen7_mfd_context->bitplane_read_buffer.valid)
367         OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
368                         I915_GEM_DOMAIN_INSTRUCTION, 0,
369                         0);
370     else {
371         OUT_BCS_BATCH(batch, 0);
372         OUT_BCS_BATCH(batch, 0);
373     }
374     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
375     ADVANCE_BCS_BATCH(batch);
376 }
377
378 static void
379 gen8_mfd_qm_state(VADriverContextP ctx,
380                   int qm_type,
381                   unsigned char *qm,
382                   int qm_length,
383                   struct gen7_mfd_context *gen7_mfd_context)
384 {
385     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
386     unsigned int qm_buffer[16];
387
388     assert(qm_length <= 16 * 4);
389     memcpy(qm_buffer, qm, qm_length);
390
391     BEGIN_BCS_BATCH(batch, 18);
392     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
393     OUT_BCS_BATCH(batch, qm_type << 0);
394     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
395     ADVANCE_BCS_BATCH(batch);
396 }
397
398 static void
399 gen8_mfd_avc_img_state(VADriverContextP ctx,
400                        struct decode_state *decode_state,
401                        struct gen7_mfd_context *gen7_mfd_context)
402 {
403     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
404     int img_struct;
405     int mbaff_frame_flag;
406     unsigned int width_in_mbs, height_in_mbs;
407     VAPictureParameterBufferH264 *pic_param;
408
409     assert(decode_state->pic_param && decode_state->pic_param->buffer);
410     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
412
413     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
414         img_struct = 1;
415     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
416         img_struct = 3;
417     else
418         img_struct = 0;
419
420     if ((img_struct & 0x1) == 0x1) {
421         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
422     } else {
423         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
424     }
425
426     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
427         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
428         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
429     } else {
430         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
431     }
432
433     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
434                         !pic_param->pic_fields.bits.field_pic_flag);
435
436     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
437     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
438
439     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
440     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
441            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
442     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
443
444     BEGIN_BCS_BATCH(batch, 17);
445     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
446     OUT_BCS_BATCH(batch,
447                   (width_in_mbs * height_in_mbs - 1));
448     OUT_BCS_BATCH(batch,
449                   ((height_in_mbs - 1) << 16) |
450                   ((width_in_mbs - 1) << 0));
451     OUT_BCS_BATCH(batch,
452                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
453                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
454                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
455                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
456                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
457                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
458                   (img_struct << 8));
459     OUT_BCS_BATCH(batch,
460                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
461                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
462                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
463                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
464                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
465                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
466                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
467                   (mbaff_frame_flag << 1) |
468                   (pic_param->pic_fields.bits.field_pic_flag << 0));
469     OUT_BCS_BATCH(batch, 0);
470     OUT_BCS_BATCH(batch, 0);
471     OUT_BCS_BATCH(batch, 0);
472     OUT_BCS_BATCH(batch, 0);
473     OUT_BCS_BATCH(batch, 0);
474     OUT_BCS_BATCH(batch, 0);
475     OUT_BCS_BATCH(batch, 0);
476     OUT_BCS_BATCH(batch, 0);
477     OUT_BCS_BATCH(batch, 0);
478     OUT_BCS_BATCH(batch, 0);
479     OUT_BCS_BATCH(batch, 0);
480     OUT_BCS_BATCH(batch, 0);
481     ADVANCE_BCS_BATCH(batch);
482 }
483
484 static void
485 gen8_mfd_avc_qm_state(VADriverContextP ctx,
486                       struct decode_state *decode_state,
487                       struct gen7_mfd_context *gen7_mfd_context)
488 {
489     VAIQMatrixBufferH264 *iq_matrix;
490     VAPictureParameterBufferH264 *pic_param;
491
492     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
493         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
494     else
495         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
496
497     assert(decode_state->pic_param && decode_state->pic_param->buffer);
498     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
499
500     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
501     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
502
503     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
504         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
505         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
506     }
507 }
508
509 static inline void
510 gen8_mfd_avc_picid_state(VADriverContextP ctx,
511                          struct decode_state *decode_state,
512                          struct gen7_mfd_context *gen7_mfd_context)
513 {
514     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
515                                gen7_mfd_context->reference_surface);
516 }
517
518 static void
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520                               struct decode_state *decode_state,
521                               VAPictureParameterBufferH264 *pic_param,
522                               VASliceParameterBufferH264 *slice_param,
523                               struct gen7_mfd_context *gen7_mfd_context)
524 {
525     struct i965_driver_data *i965 = i965_driver_data(ctx);
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
545                             I915_GEM_DOMAIN_INSTRUCTION, 0,
546                             0);
547         } else {
548             OUT_BCS_BATCH(batch, 0);
549             OUT_BCS_BATCH(batch, 0);
550         }
551     }
552
553     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
554
555     /* the current decoding frame/field */
556     va_pic = &pic_param->CurrPic;
557     obj_surface = decode_state->render_object;
558     assert(obj_surface->bo && obj_surface->private_data);
559     gen7_avc_surface = obj_surface->private_data;
560
561     OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
562                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
563                     0);
564
565     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
566
567     /* POC List */
568     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
569         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
570
571         if (obj_surface) {
572             const VAPictureH264 * const va_pic = avc_find_picture(
573                                                      obj_surface->base.id, pic_param->ReferenceFrames,
574                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
575
576             assert(va_pic != NULL);
577             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
578             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
579         } else {
580             OUT_BCS_BATCH(batch, 0);
581             OUT_BCS_BATCH(batch, 0);
582         }
583     }
584
585     va_pic = &pic_param->CurrPic;
586     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
587     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
588
589     ADVANCE_BCS_BATCH(batch);
590 }
591
592 static void
593 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
594                                  VAPictureParameterBufferH264 *pic_param,
595                                  VASliceParameterBufferH264 *next_slice_param,
596                                  struct gen7_mfd_context *gen7_mfd_context)
597 {
598     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
599 }
600
601 static void
602 gen8_mfd_avc_slice_state(VADriverContextP ctx,
603                          VAPictureParameterBufferH264 *pic_param,
604                          VASliceParameterBufferH264 *slice_param,
605                          VASliceParameterBufferH264 *next_slice_param,
606                          struct gen7_mfd_context *gen7_mfd_context)
607 {
608     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
609     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
610     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
611     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
612     int num_ref_idx_l0, num_ref_idx_l1;
613     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
614                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
615     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
616     int slice_type;
617
618     if (slice_param->slice_type == SLICE_TYPE_I ||
619         slice_param->slice_type == SLICE_TYPE_SI) {
620         slice_type = SLICE_TYPE_I;
621     } else if (slice_param->slice_type == SLICE_TYPE_P ||
622                slice_param->slice_type == SLICE_TYPE_SP) {
623         slice_type = SLICE_TYPE_P;
624     } else {
625         assert(slice_param->slice_type == SLICE_TYPE_B);
626         slice_type = SLICE_TYPE_B;
627     }
628
629     if (slice_type == SLICE_TYPE_I) {
630         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
631         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
632         num_ref_idx_l0 = 0;
633         num_ref_idx_l1 = 0;
634     } else if (slice_type == SLICE_TYPE_P) {
635         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
637         num_ref_idx_l1 = 0;
638     } else {
639         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
640         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
641     }
642
643     first_mb_in_slice = slice_param->first_mb_in_slice;
644     slice_hor_pos = first_mb_in_slice % width_in_mbs;
645     slice_ver_pos = first_mb_in_slice / width_in_mbs;
646
647     if (mbaff_picture)
648         slice_ver_pos = slice_ver_pos << 1;
649     if (next_slice_param) {
650         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
651         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
652         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
653
654         if (mbaff_picture)
655             next_slice_ver_pos = next_slice_ver_pos << 1;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch,
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch,
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch,
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) |
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch,
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch,
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
839                                        gen7_mfd_context->reference_surface);
840     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
841     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
842     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
843     assert(height_in_mbs > 0 && height_in_mbs <= 256);
844
845     /* Current decoded picture */
846     obj_surface = decode_state->render_object;
847     if (pic_param->pic_fields.bits.reference_pic_flag)
848         obj_surface->flags |= SURFACE_REFERENCED;
849     else
850         obj_surface->flags &= ~SURFACE_REFERENCED;
851
852     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
853     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
854
855     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
856     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
857     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
858     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
859
860     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
861     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
862     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
863     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
864
865     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
866     bo = dri_bo_alloc(i965->intel.bufmgr,
867                       "intra row store",
868                       width_in_mbs * 64,
869                       0x1000);
870     assert(bo);
871     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
872     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
873
874     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
875     bo = dri_bo_alloc(i965->intel.bufmgr,
876                       "deblocking filter row store",
877                       width_in_mbs * 64 * 4,
878                       0x1000);
879     assert(bo);
880     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
881     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
882
883     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
884     bo = dri_bo_alloc(i965->intel.bufmgr,
885                       "bsd mpc row store",
886                       width_in_mbs * 64 * 2,
887                       0x1000);
888     assert(bo);
889     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
890     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
891
892     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
893     bo = dri_bo_alloc(i965->intel.bufmgr,
894                       "mpr row store",
895                       width_in_mbs * 64 * 2,
896                       0x1000);
897     assert(bo);
898     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
899     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
900
901     gen7_mfd_context->bitplane_read_buffer.valid = 0;
902 }
903
904 static void
905 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
906                             struct decode_state *decode_state,
907                             struct gen7_mfd_context *gen7_mfd_context)
908 {
909     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
910     VAPictureParameterBufferH264 *pic_param;
911     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
912     dri_bo *slice_data_bo;
913     int i, j;
914
915     assert(decode_state->pic_param && decode_state->pic_param->buffer);
916     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
917     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
918
919     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
920     intel_batchbuffer_emit_mi_flush(batch);
921     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
923     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
924     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
925     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
926     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
927     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
928
929     for (j = 0; j < decode_state->num_slice_params; j++) {
930         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
931         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
932         slice_data_bo = decode_state->slice_datas[j]->bo;
933         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
934
935         if (j == decode_state->num_slice_params - 1)
936             next_slice_group_param = NULL;
937         else
938             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
939
940         if (j == 0 && slice_param->first_mb_in_slice)
941             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
942
943         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
944             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
945             assert((slice_param->slice_type == SLICE_TYPE_I) ||
946                    (slice_param->slice_type == SLICE_TYPE_SI) ||
947                    (slice_param->slice_type == SLICE_TYPE_P) ||
948                    (slice_param->slice_type == SLICE_TYPE_SP) ||
949                    (slice_param->slice_type == SLICE_TYPE_B));
950
951             if (i < decode_state->slice_params[j]->num_elements - 1)
952                 next_slice_param = slice_param + 1;
953             else
954                 next_slice_param = next_slice_group_param;
955
956             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
957             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
958             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
959             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
960             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
961             slice_param++;
962         }
963     }
964
965     intel_batchbuffer_end_atomic(batch);
966     intel_batchbuffer_flush(batch);
967 }
968
969 static void
970 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
971                            struct decode_state *decode_state,
972                            struct gen7_mfd_context *gen7_mfd_context)
973 {
974     VAPictureParameterBufferMPEG2 *pic_param;
975     struct i965_driver_data *i965 = i965_driver_data(ctx);
976     struct object_surface *obj_surface;
977     dri_bo *bo;
978     unsigned int width_in_mbs;
979
980     assert(decode_state->pic_param && decode_state->pic_param->buffer);
981     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
982     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
983
984     mpeg2_set_reference_surfaces(
985         ctx,
986         gen7_mfd_context->reference_surface,
987         decode_state,
988         pic_param
989     );
990
991     /* Current decoded picture */
992     obj_surface = decode_state->render_object;
993     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
994
995     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
996     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
997     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
998     gen7_mfd_context->pre_deblocking_output.valid = 1;
999
1000     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1001     bo = dri_bo_alloc(i965->intel.bufmgr,
1002                       "bsd mpc row store",
1003                       width_in_mbs * 96,
1004                       0x1000);
1005     assert(bo);
1006     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1007     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1008
1009     gen7_mfd_context->post_deblocking_output.valid = 0;
1010     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1011     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1012     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1013     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1014 }
1015
1016 static void
1017 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1018                          struct decode_state *decode_state,
1019                          struct gen7_mfd_context *gen7_mfd_context)
1020 {
1021     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1022     VAPictureParameterBufferMPEG2 *pic_param;
1023     unsigned int slice_concealment_disable_bit = 0;
1024
1025     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1026     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1027
1028     slice_concealment_disable_bit = 1;
1029
1030     BEGIN_BCS_BATCH(batch, 13);
1031     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1032     OUT_BCS_BATCH(batch,
1033                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1034                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1035                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1036                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1037                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1038                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1039                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1040                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1041                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1042                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1043                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1044                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1045     OUT_BCS_BATCH(batch,
1046                   pic_param->picture_coding_type << 9);
1047     OUT_BCS_BATCH(batch,
1048                   (slice_concealment_disable_bit << 31) |
1049                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1050                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1051     OUT_BCS_BATCH(batch, 0);
1052     OUT_BCS_BATCH(batch, 0);
1053     OUT_BCS_BATCH(batch, 0);
1054     OUT_BCS_BATCH(batch, 0);
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     ADVANCE_BCS_BATCH(batch);
1061 }
1062
1063 static void
1064 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1065                         struct decode_state *decode_state,
1066                         struct gen7_mfd_context *gen7_mfd_context)
1067 {
1068     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1069     int i, j;
1070
1071     /* Update internal QM state */
1072     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1073         VAIQMatrixBufferMPEG2 * const iq_matrix =
1074             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1075
1076         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1077             iq_matrix->load_intra_quantiser_matrix) {
1078             gen_iq_matrix->load_intra_quantiser_matrix =
1079                 iq_matrix->load_intra_quantiser_matrix;
1080             if (iq_matrix->load_intra_quantiser_matrix) {
1081                 for (j = 0; j < 64; j++)
1082                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1083                         iq_matrix->intra_quantiser_matrix[j];
1084             }
1085         }
1086
1087         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1088             iq_matrix->load_non_intra_quantiser_matrix) {
1089             gen_iq_matrix->load_non_intra_quantiser_matrix =
1090                 iq_matrix->load_non_intra_quantiser_matrix;
1091             if (iq_matrix->load_non_intra_quantiser_matrix) {
1092                 for (j = 0; j < 64; j++)
1093                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1094                         iq_matrix->non_intra_quantiser_matrix[j];
1095             }
1096         }
1097     }
1098
1099     /* Commit QM state to HW */
1100     for (i = 0; i < 2; i++) {
1101         unsigned char *qm = NULL;
1102         int qm_type;
1103
1104         if (i == 0) {
1105             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1106                 qm = gen_iq_matrix->intra_quantiser_matrix;
1107                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1108             }
1109         } else {
1110             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1111                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1112                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1113             }
1114         }
1115
1116         if (!qm)
1117             continue;
1118
1119         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1120     }
1121 }
1122
1123 static void
1124 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1125                           VAPictureParameterBufferMPEG2 *pic_param,
1126                           VASliceParameterBufferMPEG2 *slice_param,
1127                           VASliceParameterBufferMPEG2 *next_slice_param,
1128                           struct gen7_mfd_context *gen7_mfd_context)
1129 {
1130     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1131     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1132     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1133
1134     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1135         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1136         is_field_pic = 1;
1137     is_field_pic_wa = is_field_pic &&
1138                       gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1139
1140     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1141     hpos0 = slice_param->slice_horizontal_position;
1142
1143     if (next_slice_param == NULL) {
1144         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1145         hpos1 = 0;
1146     } else {
1147         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1148         hpos1 = next_slice_param->slice_horizontal_position;
1149     }
1150
1151     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1152
1153     BEGIN_BCS_BATCH(batch, 5);
1154     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1155     OUT_BCS_BATCH(batch,
1156                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1157     OUT_BCS_BATCH(batch,
1158                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1159     OUT_BCS_BATCH(batch,
1160                   hpos0 << 24 |
1161                   vpos0 << 16 |
1162                   mb_count << 8 |
1163                   (next_slice_param == NULL) << 5 |
1164                   (next_slice_param == NULL) << 3 |
1165                   (slice_param->macroblock_offset & 0x7));
1166     OUT_BCS_BATCH(batch,
1167                   (slice_param->quantiser_scale_code << 24) |
1168                   (vpos1 << 8 | hpos1));
1169     ADVANCE_BCS_BATCH(batch);
1170 }
1171
1172 static void
1173 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1174                               struct decode_state *decode_state,
1175                               struct gen7_mfd_context *gen7_mfd_context)
1176 {
1177     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1178     VAPictureParameterBufferMPEG2 *pic_param;
1179     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1180     dri_bo *slice_data_bo;
1181     int i, j;
1182
1183     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1184     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1185
1186     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1187     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1188     intel_batchbuffer_emit_mi_flush(batch);
1189     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1194     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1195
1196     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1197         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1198             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1199
1200     for (j = 0; j < decode_state->num_slice_params; j++) {
1201         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1202         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1203         slice_data_bo = decode_state->slice_datas[j]->bo;
1204         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1205
1206         if (j == decode_state->num_slice_params - 1)
1207             next_slice_group_param = NULL;
1208         else
1209             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1210
1211         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1212             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1213
1214             if (i < decode_state->slice_params[j]->num_elements - 1)
1215                 next_slice_param = slice_param + 1;
1216             else
1217                 next_slice_param = next_slice_group_param;
1218
1219             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1220             slice_param++;
1221         }
1222     }
1223
1224     intel_batchbuffer_end_atomic(batch);
1225     intel_batchbuffer_flush(batch);
1226 }
1227
1228 static const int va_to_gen7_vc1_mv[4] = {
1229     1, /* 1-MV */
1230     2, /* 1-MV half-pel */
1231     3, /* 1-MV half-pef bilinear */
1232     0, /* Mixed MV */
1233 };
1234
1235 static const int b_picture_scale_factor[21] = {
1236     128, 85,  170, 64,  192,
1237     51,  102, 153, 204, 43,
1238     215, 37,  74,  111, 148,
1239     185, 222, 32,  96,  160,
1240     224,
1241 };
1242
1243 static const int va_to_gen7_vc1_condover[3] = {
1244     0,
1245     2,
1246     3
1247 };
1248
1249 static const int fptype_to_picture_type[8][2] = {
1250     {GEN7_VC1_I_PICTURE, GEN7_VC1_I_PICTURE},
1251     {GEN7_VC1_I_PICTURE, GEN7_VC1_P_PICTURE},
1252     {GEN7_VC1_P_PICTURE, GEN7_VC1_I_PICTURE},
1253     {GEN7_VC1_P_PICTURE, GEN7_VC1_P_PICTURE},
1254     {GEN7_VC1_B_PICTURE, GEN7_VC1_B_PICTURE},
1255     {GEN7_VC1_B_PICTURE, GEN7_VC1_BI_PICTURE},
1256     {GEN7_VC1_BI_PICTURE, GEN7_VC1_B_PICTURE},
1257     {GEN7_VC1_BI_PICTURE, GEN7_VC1_BI_PICTURE}
1258 };
1259
1260 static void
1261 gen8_mfd_free_vc1_surface(void **data)
1262 {
1263     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1264
1265     if (!gen7_vc1_surface)
1266         return;
1267
1268     dri_bo_unreference(gen7_vc1_surface->dmv_top);
1269     dri_bo_unreference(gen7_vc1_surface->dmv_bottom);
1270     free(gen7_vc1_surface);
1271     *data = NULL;
1272 }
1273
1274 static void
1275 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1276                           VAPictureParameterBufferVC1 *pic_param,
1277                           struct object_surface *obj_surface)
1278 {
1279     struct i965_driver_data *i965 = i965_driver_data(ctx);
1280     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1281     int height_in_mbs;
1282     int picture_type;
1283     int is_first_field = 1;
1284
1285     if (!pic_param->sequence_fields.bits.interlace ||
1286         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1287         picture_type = pic_param->picture_fields.bits.picture_type;
1288     } else {/* Field-Interlace */
1289         is_first_field = pic_param->picture_fields.bits.is_first_field;
1290         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1291     }
1292
1293     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1294
1295     if (!gen7_vc1_surface) {
1296         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1297
1298         if (!gen7_vc1_surface)
1299             return;
1300
1301         assert((obj_surface->size & 0x3f) == 0);
1302         obj_surface->private_data = gen7_vc1_surface;
1303     }
1304
1305     if (!pic_param->sequence_fields.bits.interlace ||
1306         pic_param->picture_fields.bits.frame_coding_mode < 2 || /* Progressive or Frame-Interlace */
1307         is_first_field) {
1308         gen7_vc1_surface->picture_type_top = 0;
1309         gen7_vc1_surface->picture_type_bottom = 0;
1310         gen7_vc1_surface->intensity_compensation_top = 0;
1311         gen7_vc1_surface->intensity_compensation_bottom = 0;
1312         gen7_vc1_surface->luma_scale_top[0] = 0;
1313         gen7_vc1_surface->luma_scale_top[1] = 0;
1314         gen7_vc1_surface->luma_scale_bottom[0] = 0;
1315         gen7_vc1_surface->luma_scale_bottom[1] = 0;
1316         gen7_vc1_surface->luma_shift_top[0] = 0;
1317         gen7_vc1_surface->luma_shift_top[1] = 0;
1318         gen7_vc1_surface->luma_shift_bottom[0] = 0;
1319         gen7_vc1_surface->luma_shift_bottom[1] = 0;
1320     }
1321
1322     if (!pic_param->sequence_fields.bits.interlace ||
1323         pic_param->picture_fields.bits.frame_coding_mode < 2) { /* Progressive or Frame-Interlace */
1324         gen7_vc1_surface->picture_type_top = picture_type;
1325         gen7_vc1_surface->picture_type_bottom = picture_type;
1326     } else if (pic_param->picture_fields.bits.top_field_first ^ is_first_field)
1327         gen7_vc1_surface->picture_type_bottom = picture_type;
1328     else
1329         gen7_vc1_surface->picture_type_top = picture_type;
1330
1331     /*
1332      * The Direct MV buffer is scalable with frame height, but
1333      * does not scale with frame width as the hardware assumes
1334      * that frame width is fixed at 128 MBs.
1335      */
1336
1337     if (gen7_vc1_surface->dmv_top == NULL) {
1338         height_in_mbs = ALIGN(obj_surface->orig_height, 16) / 16;
1339         gen7_vc1_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
1340                                              "direct mv w/r buffer",
1341                                              128 * height_in_mbs * 64,
1342                                              0x1000);
1343     }
1344
1345     if (pic_param->sequence_fields.bits.interlace &&
1346         gen7_vc1_surface->dmv_bottom == NULL) {
1347         height_in_mbs = ALIGN(obj_surface->orig_height, 32) / 32;
1348         gen7_vc1_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
1349                                              "direct mv w/r buffer",
1350                                              128 * height_in_mbs * 64,
1351                                              0x1000);
1352     }
1353 }
1354
1355 static void
1356 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1357                          struct decode_state *decode_state,
1358                          struct gen7_mfd_context *gen7_mfd_context)
1359 {
1360     VAPictureParameterBufferVC1 *pic_param;
1361     struct i965_driver_data *i965 = i965_driver_data(ctx);
1362     struct object_surface *obj_surface;
1363     struct gen7_vc1_surface *gen7_vc1_current_surface;
1364     struct gen7_vc1_surface *gen7_vc1_forward_surface;
1365     dri_bo *bo;
1366     int width_in_mbs;
1367     int picture_type;
1368     int is_first_field = 1;
1369     int i;
1370
1371     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1372     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1373     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1374
1375     if (!pic_param->sequence_fields.bits.interlace ||
1376         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1377         picture_type = pic_param->picture_fields.bits.picture_type;
1378     } else {/* Field-Interlace */
1379         is_first_field = pic_param->picture_fields.bits.is_first_field;
1380         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1381     }
1382
1383     /* Current decoded picture */
1384     obj_surface = decode_state->render_object;
1385     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1386     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1387
1388     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1389     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1390     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1391
1392     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1393     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1394     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1395
1396     if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1397         gen7_mfd_context->post_deblocking_output.valid = 0;
1398         gen7_mfd_context->pre_deblocking_output.valid = 1;
1399     } else {
1400         gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1401         gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1402     }
1403
1404     intel_update_vc1_frame_store_index(ctx,
1405                                        decode_state,
1406                                        pic_param,
1407                                        gen7_mfd_context->reference_surface);
1408
1409     if (picture_type == GEN7_VC1_P_PICTURE) {
1410         obj_surface = decode_state->reference_objects[0];
1411         gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1412         if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1413             obj_surface)
1414             gen7_vc1_forward_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1415         else
1416             gen7_vc1_forward_surface = NULL;
1417
1418         if (!pic_param->sequence_fields.bits.interlace ||
1419             pic_param->picture_fields.bits.frame_coding_mode == 0) { /* Progressive */
1420             if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1421                 if (gen7_vc1_forward_surface) {
1422                     gen7_vc1_forward_surface->intensity_compensation_top = 1;
1423                     gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
1424                     gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
1425                     gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
1426                     gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
1427                     gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
1428                 }
1429             }
1430         } else if (pic_param->sequence_fields.bits.interlace &&
1431             pic_param->picture_fields.bits.frame_coding_mode == 1) { /* Frame-Interlace */
1432             if (pic_param->picture_fields.bits.intensity_compensation) {
1433                 if (gen7_vc1_forward_surface) {
1434                     gen7_vc1_forward_surface->intensity_compensation_top = 1;
1435                     gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
1436                     gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
1437                     gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
1438                     gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
1439                     gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
1440                 }
1441             }
1442         } else if (pic_param->sequence_fields.bits.interlace &&
1443                    pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
1444             if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1445                 if (pic_param->intensity_compensation_field == 1 || /* Top field */
1446                     pic_param->intensity_compensation_field == 0) { /* Both fields */
1447                     if (is_first_field) {
1448                         if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1449                              (pic_param->reference_fields.bits.reference_field_pic_indicator ==
1450                              pic_param->picture_fields.bits.top_field_first)) ||
1451                             pic_param->reference_fields.bits.num_reference_pictures) {
1452                             if (gen7_vc1_forward_surface) {
1453                                 i = gen7_vc1_forward_surface->intensity_compensation_top++;
1454                                 gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
1455                                 gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
1456                             }
1457                         }
1458                     } else { /* Second field */
1459                         if (pic_param->picture_fields.bits.top_field_first) {
1460                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1461                                  !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1462                                 pic_param->reference_fields.bits.num_reference_pictures) {
1463                                 i = gen7_vc1_current_surface->intensity_compensation_top++;
1464                                 gen7_vc1_current_surface->luma_scale_top[i] = pic_param->luma_scale;
1465                                 gen7_vc1_current_surface->luma_shift_top[i] = pic_param->luma_shift;
1466                             }
1467                         } else {
1468                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1469                                  pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1470                                 pic_param->reference_fields.bits.num_reference_pictures) {
1471                                 if (gen7_vc1_forward_surface) {
1472                                     i = gen7_vc1_forward_surface->intensity_compensation_top++;
1473                                     gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
1474                                     gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
1475                                 }
1476                             }
1477                         }
1478                     }
1479                 }
1480                 if (pic_param->intensity_compensation_field == 2 || /* Bottom field */
1481                     pic_param->intensity_compensation_field == 0) { /* Both fields */
1482                     if (is_first_field) {
1483                         if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1484                              (pic_param->reference_fields.bits.reference_field_pic_indicator ^
1485                               pic_param->picture_fields.bits.top_field_first)) ||
1486                             pic_param->reference_fields.bits.num_reference_pictures) {
1487                             if (gen7_vc1_forward_surface) {
1488                                 i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
1489                                 if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1490                                     gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1491                                     gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1492                                 } else { /* Both fields */
1493                                     gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1494                                     gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1495                                 }
1496                             }
1497                         }
1498                     } else { /* Second field */
1499                         if (pic_param->picture_fields.bits.top_field_first) {
1500                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1501                                  pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1502                                 pic_param->reference_fields.bits.num_reference_pictures) {
1503                                 if (gen7_vc1_forward_surface) {
1504                                     i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
1505                                     if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1506                                         gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1507                                         gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1508                                     } else { /* Both fields */
1509                                         gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1510                                         gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1511                                     }
1512                                 }
1513                             }
1514                         } else {
1515                            if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1516                                  !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1517                                 pic_param->reference_fields.bits.num_reference_pictures) {
1518                                 i = gen7_vc1_current_surface->intensity_compensation_bottom++;
1519                                if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1520                                    gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1521                                    gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1522                                 } else { /* Both fields */
1523                                     gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1524                                     gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1525                                 }
1526                             }
1527                         }
1528                     }
1529                 }
1530             }
1531         }
1532     }
1533
1534     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1535     bo = dri_bo_alloc(i965->intel.bufmgr,
1536                       "intra row store",
1537                       width_in_mbs * 64,
1538                       0x1000);
1539     assert(bo);
1540     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1541     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1542
1543     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1544     bo = dri_bo_alloc(i965->intel.bufmgr,
1545                       "deblocking filter row store",
1546                       width_in_mbs * 7 * 64,
1547                       0x1000);
1548     assert(bo);
1549     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1550     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1551
1552     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1553     bo = dri_bo_alloc(i965->intel.bufmgr,
1554                       "bsd mpc row store",
1555                       width_in_mbs * 96,
1556                       0x1000);
1557     assert(bo);
1558     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1559     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1560
1561     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1562
1563     if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1564         gen7_mfd_context->bitplane_read_buffer.valid = 1;
1565     else
1566         gen7_mfd_context->bitplane_read_buffer.valid = !!(pic_param->bitplane_present.value & 0x7f);
1567     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1568
1569     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1570         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1571         int height_in_mbs;
1572         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1573         int src_w, src_h;
1574         uint8_t *src = NULL, *dst = NULL;
1575
1576         if (!pic_param->sequence_fields.bits.interlace ||
1577             (pic_param->picture_fields.bits.frame_coding_mode < 2)) /* Progressive or Frame-Interlace */
1578             height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1579         else /* Field-Interlace */
1580             height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
1581
1582         bo = dri_bo_alloc(i965->intel.bufmgr,
1583                           "VC-1 Bitplane",
1584                           bitplane_width * height_in_mbs,
1585                           0x1000);
1586         assert(bo);
1587         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1588
1589         dri_bo_map(bo, True);
1590         assert(bo->virtual);
1591         dst = bo->virtual;
1592
1593         if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1594             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1595                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1596                     int dst_index;
1597                     uint8_t src_value = 0x2;
1598
1599                     dst_index = src_w / 2;
1600                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1601                 }
1602
1603                 if (src_w & 1)
1604                     dst[src_w / 2] >>= 4;
1605
1606                 dst += bitplane_width;
1607             }
1608         } else {
1609             assert(decode_state->bit_plane->buffer);
1610             src = decode_state->bit_plane->buffer;
1611
1612             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1613                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1614                     int src_index, dst_index;
1615                     int src_shift;
1616                     uint8_t src_value;
1617
1618                     src_index = (src_h * width_in_mbs + src_w) / 2;
1619                     src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1620                     src_value = ((src[src_index] >> src_shift) & 0xf);
1621
1622                     dst_index = src_w / 2;
1623                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1624                 }
1625
1626                 if (src_w & 1)
1627                     dst[src_w / 2] >>= 4;
1628
1629                 dst += bitplane_width;
1630             }
1631         }
1632
1633         dri_bo_unmap(bo);
1634     } else
1635         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1636 }
1637
1638 static void
1639 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1640                        struct decode_state *decode_state,
1641                        struct gen7_mfd_context *gen7_mfd_context)
1642 {
1643     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1644     VAPictureParameterBufferVC1 *pic_param;
1645     struct object_surface *obj_surface;
1646     struct gen7_vc1_surface *gen7_vc1_current_surface;
1647     struct gen7_vc1_surface *gen7_vc1_reference_surface;
1648     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1649     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1650     int unified_mv_mode = 0;
1651     int ref_field_pic_polarity = 0;
1652     int scale_factor = 0;
1653     int trans_ac_y = 0;
1654     int dmv_surface_valid = 0;
1655     int frfd = 0;
1656     int brfd = 0;
1657     int fcm = 0;
1658     int picture_type;
1659     int ptype;
1660     int overlap = 0;
1661     int interpolation_mode = 0;
1662     int height_in_mbs;
1663     int is_first_field = 1;
1664     int loopfilter = 0;
1665     int bitplane_present;
1666     int range_reduction = 0;
1667     int range_reduction_scale = 0;
1668     int forward_mb = 0, mv_type_mb = 0, skip_mb = 0, direct_mb = 0;
1669     int overflags = 0, ac_pred = 0, field_tx = 0;
1670
1671     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1672     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1673
1674     if (!pic_param->sequence_fields.bits.interlace ||
1675         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1676         picture_type = pic_param->picture_fields.bits.picture_type;
1677         height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1678     } else {/* Field-Interlace */
1679         is_first_field = pic_param->picture_fields.bits.is_first_field;
1680         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1681         height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
1682     }
1683
1684     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1685     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1686     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1687     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1688     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1689     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1690     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1691
1692     if (dquant == 0) {
1693         alt_pquant_config = 0;
1694         alt_pquant_edge_mask = 0;
1695     } else if (dquant == 2) {
1696         alt_pquant_config = 1;
1697         alt_pquant_edge_mask = 0xf;
1698     } else {
1699         assert(dquant == 1);
1700         if (dquantfrm == 0) {
1701             alt_pquant_config = 0;
1702             alt_pquant_edge_mask = 0;
1703             alt_pq = 0;
1704         } else {
1705             assert(dquantfrm == 1);
1706             alt_pquant_config = 1;
1707
1708             switch (dqprofile) {
1709             case 3:
1710                 if (dqbilevel == 0) {
1711                     alt_pquant_config = 2;
1712                     alt_pquant_edge_mask = 0;
1713                 } else {
1714                     assert(dqbilevel == 1);
1715                     alt_pquant_config = 3;
1716                     alt_pquant_edge_mask = 0;
1717                 }
1718                 break;
1719
1720             case 0:
1721                 alt_pquant_edge_mask = 0xf;
1722                 break;
1723
1724             case 1:
1725                 if (dqdbedge == 3)
1726                     alt_pquant_edge_mask = 0x9;
1727                 else
1728                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1729
1730                 break;
1731
1732             case 2:
1733                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1734                 break;
1735
1736             default:
1737                 assert(0);
1738             }
1739         }
1740     }
1741
1742     if (pic_param->sequence_fields.bits.profile == 1 && /* Main Profile */
1743         pic_param->sequence_fields.bits.rangered) {
1744         obj_surface = decode_state->reference_objects[0];
1745
1746         gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1747
1748         if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1749             obj_surface)
1750             gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1751         else
1752             gen7_vc1_reference_surface = NULL;
1753
1754         if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1755             if (gen7_vc1_reference_surface)
1756                 gen7_vc1_current_surface->range_reduction_frame = gen7_vc1_reference_surface->range_reduction_frame;
1757             else
1758                 gen7_vc1_current_surface->range_reduction_frame = 0;
1759         else
1760             gen7_vc1_current_surface->range_reduction_frame = pic_param->range_reduction_frame;
1761
1762         if (gen7_vc1_reference_surface) {
1763             if (gen7_vc1_current_surface->range_reduction_frame &&
1764                 !gen7_vc1_reference_surface->range_reduction_frame) {
1765                 range_reduction = 1;
1766                 range_reduction_scale = 0;
1767             } else if (!gen7_vc1_current_surface->range_reduction_frame &&
1768                        gen7_vc1_reference_surface->range_reduction_frame) {
1769                 range_reduction = 1;
1770                 range_reduction_scale = 1;
1771             }
1772         }
1773     }
1774
1775     if ((!pic_param->sequence_fields.bits.interlace ||
1776          pic_param->picture_fields.bits.frame_coding_mode != 1) && /* Progressive or Field-Interlace */
1777         (picture_type == GEN7_VC1_P_PICTURE ||
1778          picture_type == GEN7_VC1_B_PICTURE)) {
1779         if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1780             assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1781             unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1782         } else {
1783             assert(pic_param->mv_fields.bits.mv_mode < 4);
1784             unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1785         }
1786     }
1787
1788     if (pic_param->sequence_fields.bits.interlace &&
1789         pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
1790         picture_type == GEN7_VC1_P_PICTURE &&
1791         !pic_param->reference_fields.bits.num_reference_pictures) {
1792         if (pic_param->reference_fields.bits.reference_field_pic_indicator == 0) {
1793             ref_field_pic_polarity = is_first_field ?
1794                                         pic_param->picture_fields.bits.top_field_first :
1795                                         !pic_param->picture_fields.bits.top_field_first;
1796         } else {
1797             ref_field_pic_polarity = is_first_field ?
1798                                         !pic_param->picture_fields.bits.top_field_first :
1799                                         pic_param->picture_fields.bits.top_field_first;
1800         }
1801     }
1802
1803     if (pic_param->b_picture_fraction < 21)
1804         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1805
1806     if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1807         ptype = GEN7_VC1_P_PICTURE;
1808         bitplane_present = 1;
1809     } else {
1810         ptype = pic_param->picture_fields.bits.picture_type;
1811         bitplane_present = !!(pic_param->bitplane_present.value & 0x7f);
1812         forward_mb = pic_param->raw_coding.flags.forward_mb;
1813         mv_type_mb = pic_param->raw_coding.flags.mv_type_mb;
1814         skip_mb = pic_param->raw_coding.flags.skip_mb;
1815         direct_mb = pic_param->raw_coding.flags.direct_mb;
1816         overflags = pic_param->raw_coding.flags.overflags;
1817         ac_pred = pic_param->raw_coding.flags.ac_pred;
1818         field_tx = pic_param->raw_coding.flags.field_tx;
1819         loopfilter = pic_param->entrypoint_fields.bits.loopfilter;
1820     }
1821
1822     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1823         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1824     else {
1825         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1826
1827         /*
1828          * 8.3.6.2.1 Transform Type Selection
1829          * If variable-sized transform coding is not enabled,
1830          * then the 8x8 transform shall be used for all blocks.
1831          * it is also MFX_VC1_PIC_STATE requirement.
1832          */
1833         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1834             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1835             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1836         }
1837     }
1838
1839     if (picture_type == GEN7_VC1_B_PICTURE) {
1840         obj_surface = decode_state->reference_objects[1];
1841
1842         if (pic_param->backward_reference_picture != VA_INVALID_ID &&
1843             obj_surface)
1844             gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1845         else
1846             gen7_vc1_reference_surface = NULL;
1847
1848         if (gen7_vc1_reference_surface) {
1849             if (pic_param->sequence_fields.bits.interlace &&
1850                 pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
1851                 pic_param->picture_fields.bits.top_field_first ^ is_first_field) {
1852                 if (gen7_vc1_reference_surface->picture_type_bottom == GEN7_VC1_P_PICTURE)
1853                     dmv_surface_valid = 1;
1854             } else if (gen7_vc1_reference_surface->picture_type_top == GEN7_VC1_P_PICTURE)
1855                 dmv_surface_valid = 1;
1856         }
1857     }
1858
1859     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1860
1861     if (pic_param->sequence_fields.bits.interlace) {
1862         if (!pic_param->picture_fields.bits.top_field_first)
1863             fcm = 3;
1864         else
1865             fcm = pic_param->picture_fields.bits.frame_coding_mode;
1866     }
1867
1868     if (pic_param->sequence_fields.bits.interlace &&
1869         pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
1870         if (picture_type == GEN7_VC1_I_PICTURE ||
1871              picture_type == GEN7_VC1_P_PICTURE) {
1872             gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1873
1874             if (is_first_field)
1875                 gen7_vc1_current_surface->reference_distance = pic_param->reference_fields.bits.reference_distance;
1876
1877             frfd = gen7_vc1_current_surface->reference_distance;
1878         } else if (picture_type == GEN7_VC1_B_PICTURE) {
1879             obj_surface = decode_state->reference_objects[1];
1880
1881             if (pic_param->backward_reference_picture != VA_INVALID_ID &&
1882                 obj_surface)
1883                 gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1884             else
1885                 gen7_vc1_reference_surface = NULL;
1886
1887             if (gen7_vc1_reference_surface) {
1888                 frfd = (scale_factor * gen7_vc1_reference_surface->reference_distance) >> 8;
1889
1890                 brfd = gen7_vc1_reference_surface->reference_distance - frfd - 1;
1891                 if (brfd < 0)
1892                     brfd = 0;
1893             }
1894         }
1895     }
1896
1897     if (pic_param->sequence_fields.bits.overlap) {
1898         if (pic_param->sequence_fields.bits.profile == 3) { /* Advanced Profile */
1899             if (picture_type == GEN7_VC1_P_PICTURE &&
1900                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1901                 overlap = 1;
1902             }
1903             if (picture_type == GEN7_VC1_I_PICTURE ||
1904                 picture_type == GEN7_VC1_BI_PICTURE) {
1905                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1906                     overlap = 1;
1907                 } else if (pic_param->conditional_overlap_flag == 1 || /* all block boundaries */
1908                            pic_param->conditional_overlap_flag == 2) { /* coded by OVERFLAGSMB bitplane */
1909                     overlap = 1;
1910                 }
1911             }
1912         } else {
1913             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1914                 picture_type != GEN7_VC1_B_PICTURE) {
1915                 overlap = 1;
1916             }
1917         }
1918     }
1919
1920     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1921         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1922          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1923         interpolation_mode = 8 | pic_param->fast_uvmc_flag;
1924     else
1925         interpolation_mode = 0 | pic_param->fast_uvmc_flag;
1926
1927     BEGIN_BCS_BATCH(batch, 6);
1928     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1929     OUT_BCS_BATCH(batch,
1930                   ((height_in_mbs - 1) << 16) |
1931                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1932     OUT_BCS_BATCH(batch,
1933                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1934                   dmv_surface_valid << 15 |
1935                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1936                   pic_param->rounding_control << 13 |
1937                   pic_param->sequence_fields.bits.syncmarker << 12 |
1938                   interpolation_mode << 8 |
1939                   range_reduction_scale << 7 |
1940                   range_reduction << 6 |
1941                   loopfilter << 5 |
1942                   overlap << 4 |
1943                   !is_first_field << 3 |
1944                   (pic_param->sequence_fields.bits.profile == 3) << 0); /* Advanced Profile */
1945     OUT_BCS_BATCH(batch,
1946                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1947                   ptype << 26 |
1948                   fcm << 24 |
1949                   alt_pq << 16 |
1950                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1951                   scale_factor << 0);
1952     OUT_BCS_BATCH(batch,
1953                   unified_mv_mode << 28 |
1954                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1955                   pic_param->fast_uvmc_flag << 26 |
1956                   ref_field_pic_polarity << 25 |
1957                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1958                   brfd << 20 |
1959                   frfd << 16 |
1960                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1961                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1962                   alt_pquant_edge_mask << 4 |
1963                   alt_pquant_config << 2 |
1964                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1965                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1966     OUT_BCS_BATCH(batch,
1967                   bitplane_present << 31 |
1968                   forward_mb << 30 |
1969                   mv_type_mb << 29 |
1970                   skip_mb << 28 |
1971                   direct_mb << 27 |
1972                   overflags << 26 |
1973                   ac_pred << 25 |
1974                   field_tx << 24 |
1975                   pic_param->mv_fields.bits.mv_table << 20 |
1976                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1977                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1978                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1979                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1980                   pic_param->mb_mode_table << 8 |
1981                   trans_ac_y << 6 |
1982                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1983                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1984                   pic_param->cbp_table << 0);
1985     ADVANCE_BCS_BATCH(batch);
1986 }
1987
1988 static void
1989 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1990                              struct decode_state *decode_state,
1991                              struct gen7_mfd_context *gen7_mfd_context)
1992 {
1993     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1994     VAPictureParameterBufferVC1 *pic_param;
1995     struct gen7_vc1_surface *gen7_vc1_top_surface;
1996     struct gen7_vc1_surface *gen7_vc1_bottom_surface;
1997     int picture_type;
1998     int is_first_field = 1;
1999     int intensitycomp_single_fwd = 0;
2000     int intensitycomp_single_bwd = 0;
2001     int intensitycomp_double_fwd = 0;
2002     int lumscale1_single_fwd = 0;
2003     int lumscale2_single_fwd = 0;
2004     int lumshift1_single_fwd = 0;
2005     int lumshift2_single_fwd = 0;
2006     int lumscale1_single_bwd = 0;
2007     int lumscale2_single_bwd = 0;
2008     int lumshift1_single_bwd = 0;
2009     int lumshift2_single_bwd = 0;
2010     int lumscale1_double_fwd = 0;
2011     int lumscale2_double_fwd = 0;
2012     int lumshift1_double_fwd = 0;
2013     int lumshift2_double_fwd = 0;
2014     int replication_mode = 0;
2015
2016     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2017     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2018
2019     if (!pic_param->sequence_fields.bits.interlace ||
2020         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
2021         picture_type = pic_param->picture_fields.bits.picture_type;
2022     } else {/* Field-Interlace */
2023         is_first_field = pic_param->picture_fields.bits.is_first_field;
2024         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
2025     }
2026
2027     if (picture_type == GEN7_VC1_P_PICTURE ||
2028         picture_type == GEN7_VC1_B_PICTURE) {
2029         if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID)
2030             gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[0].obj_surface->private_data);
2031         else
2032             gen7_vc1_top_surface = NULL;
2033
2034         if (gen7_vc1_top_surface) {
2035             intensitycomp_single_fwd = !!gen7_vc1_top_surface->intensity_compensation_top;
2036             lumscale1_single_fwd = gen7_vc1_top_surface->luma_scale_top[0];
2037             lumshift1_single_fwd = gen7_vc1_top_surface->luma_shift_top[0];
2038             if (gen7_vc1_top_surface->intensity_compensation_top == 2) {
2039                 intensitycomp_double_fwd = 1;
2040                 lumscale1_double_fwd = gen7_vc1_top_surface->luma_scale_top[1];
2041                 lumshift1_double_fwd = gen7_vc1_top_surface->luma_shift_top[1];
2042             }
2043         }
2044
2045         if (pic_param->sequence_fields.bits.interlace &&
2046             pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2047             if (gen7_mfd_context->reference_surface[2].surface_id != VA_INVALID_ID)
2048                 gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[2].obj_surface->private_data);
2049             else
2050                 gen7_vc1_bottom_surface = NULL;
2051
2052             if (gen7_vc1_bottom_surface) {
2053                 intensitycomp_single_fwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
2054                 lumscale2_single_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
2055                 lumshift2_single_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
2056                 if (gen7_vc1_bottom_surface->intensity_compensation_bottom == 2) {
2057                     intensitycomp_double_fwd |= 2;
2058                     lumscale2_double_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[1];
2059                     lumshift2_double_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[1];
2060                 }
2061             }
2062         }
2063     }
2064
2065     if (picture_type == GEN7_VC1_B_PICTURE) {
2066         if (gen7_mfd_context->reference_surface[1].surface_id != VA_INVALID_ID)
2067             gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[1].obj_surface->private_data);
2068         else
2069             gen7_vc1_top_surface = NULL;
2070
2071         if (gen7_vc1_top_surface) {
2072             intensitycomp_single_bwd = !!gen7_vc1_top_surface->intensity_compensation_top;
2073             lumscale1_single_bwd = gen7_vc1_top_surface->luma_scale_top[0];
2074             lumshift1_single_bwd = gen7_vc1_top_surface->luma_shift_top[0];
2075         }
2076
2077         if (pic_param->sequence_fields.bits.interlace &&
2078             pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2079             if (gen7_mfd_context->reference_surface[3].surface_id != VA_INVALID_ID)
2080                 gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[3].obj_surface->private_data);
2081             else
2082                 gen7_vc1_bottom_surface = NULL;
2083
2084             if (gen7_vc1_bottom_surface) {
2085                 intensitycomp_single_bwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
2086                 lumscale2_single_bwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
2087                 lumshift2_single_bwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
2088             }
2089         }
2090     }
2091
2092     if (pic_param->sequence_fields.bits.interlace &&
2093         pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2094         if (picture_type == GEN7_VC1_P_PICTURE)
2095             replication_mode = 0x5;
2096         else if (picture_type == GEN7_VC1_B_PICTURE)
2097             replication_mode = 0xf;
2098     }
2099
2100     BEGIN_BCS_BATCH(batch, 6);
2101     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
2102     OUT_BCS_BATCH(batch,
2103                   intensitycomp_double_fwd << 14 |
2104                   0 << 12 |
2105                   intensitycomp_single_fwd << 10 |
2106                   intensitycomp_single_bwd << 8 |
2107                   replication_mode << 4 |
2108                   0);
2109     OUT_BCS_BATCH(batch,
2110                   lumshift2_single_fwd << 24 |
2111                   lumshift1_single_fwd << 16 |
2112                   lumscale2_single_fwd << 8 |
2113                   lumscale1_single_fwd << 0);
2114     OUT_BCS_BATCH(batch,
2115                   lumshift2_double_fwd << 24 |
2116                   lumshift1_double_fwd << 16 |
2117                   lumscale2_double_fwd << 8 |
2118                   lumscale1_double_fwd << 0);
2119     OUT_BCS_BATCH(batch,
2120                   lumshift2_single_bwd << 24 |
2121                   lumshift1_single_bwd << 16 |
2122                   lumscale2_single_bwd << 8 |
2123                   lumscale1_single_bwd << 0);
2124     OUT_BCS_BATCH(batch,
2125                   0 << 24 |
2126                   0 << 16 |
2127                   0 << 8 |
2128                   0 << 0);
2129     ADVANCE_BCS_BATCH(batch);
2130 }
2131
2132 static void
2133 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
2134                               struct decode_state *decode_state,
2135                               struct gen7_mfd_context *gen7_mfd_context)
2136 {
2137     struct i965_driver_data *i965 = i965_driver_data(ctx);
2138     VAPictureParameterBufferVC1 *pic_param;
2139     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2140     struct object_surface *obj_surface;
2141     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2142     int picture_type;
2143     int is_first_field = 1;
2144
2145     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2146
2147     if (!pic_param->sequence_fields.bits.interlace ||
2148         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
2149         picture_type = pic_param->picture_fields.bits.picture_type;
2150     } else {/* Field-Interlace */
2151         is_first_field = pic_param->picture_fields.bits.is_first_field;
2152         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
2153     }
2154
2155     if (picture_type == GEN7_VC1_P_PICTURE ||
2156         picture_type == GEN7_VC1_SKIPPED_PICTURE) {
2157         obj_surface = decode_state->render_object;
2158
2159         if (pic_param->sequence_fields.bits.interlace &&
2160             (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2161             (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2162             dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2163         else
2164             dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2165     }
2166
2167     if (picture_type == GEN7_VC1_B_PICTURE) {
2168         obj_surface = decode_state->reference_objects[1];
2169         if (pic_param->backward_reference_picture != VA_INVALID_ID &&
2170             obj_surface &&
2171             obj_surface->private_data) {
2172
2173             if (pic_param->sequence_fields.bits.interlace &&
2174                 (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2175                 (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2176                 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2177             else
2178                 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2179         }
2180     }
2181
2182     BEGIN_BCS_BATCH(batch, 7);
2183     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
2184
2185     if (dmv_write_buffer)
2186         OUT_BCS_RELOC64(batch, dmv_write_buffer,
2187                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2188                         0);
2189     else {
2190         OUT_BCS_BATCH(batch, 0);
2191         OUT_BCS_BATCH(batch, 0);
2192     }
2193
2194     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2195
2196     if (dmv_read_buffer)
2197         OUT_BCS_RELOC64(batch, dmv_read_buffer,
2198                         I915_GEM_DOMAIN_INSTRUCTION, 0,
2199                         0);
2200     else {
2201         OUT_BCS_BATCH(batch, 0);
2202         OUT_BCS_BATCH(batch, 0);
2203     }
2204
2205     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2206
2207     ADVANCE_BCS_BATCH(batch);
2208 }
2209
2210 static int
2211 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
2212 {
2213     int out_slice_data_bit_offset;
2214     int slice_header_size = in_slice_data_bit_offset / 8;
2215     int i, j;
2216
2217     if (profile == 3 && slice_header_size) { /* Advanced Profile */
2218         for (i = 0, j = 0; i < slice_header_size - 1; i++, j++)
2219             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4)
2220                     i++, j += 2;
2221
2222         if (i == slice_header_size - 1) {
2223             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
2224                 buf[j + 2] = 0;
2225                 j++;
2226             }
2227
2228             j++;
2229         }
2230
2231         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
2232     } else /* Simple or Main Profile */
2233         out_slice_data_bit_offset = in_slice_data_bit_offset;
2234
2235     return out_slice_data_bit_offset;
2236 }
2237
2238 static void
2239 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
2240                         VAPictureParameterBufferVC1 *pic_param,
2241                         VASliceParameterBufferVC1 *slice_param,
2242                         VASliceParameterBufferVC1 *next_slice_param,
2243                         dri_bo *slice_data_bo,
2244                         struct gen7_mfd_context *gen7_mfd_context)
2245 {
2246     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2247     int next_slice_start_vert_pos;
2248     int macroblock_offset;
2249     uint8_t *slice_data = NULL;
2250
2251     dri_bo_map(slice_data_bo, True);
2252     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
2253     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
2254                                                                slice_param->macroblock_offset,
2255                                                                pic_param->sequence_fields.bits.profile);
2256     dri_bo_unmap(slice_data_bo);
2257
2258     if (next_slice_param)
2259         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
2260     else if (!pic_param->sequence_fields.bits.interlace ||
2261              pic_param->picture_fields.bits.frame_coding_mode < 2) /* Progressive or Frame-Interlace */
2262         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
2263     else /* Field-Interlace */
2264         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 32) / 32;
2265
2266     BEGIN_BCS_BATCH(batch, 5);
2267     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
2268     OUT_BCS_BATCH(batch,
2269                   slice_param->slice_data_size - (macroblock_offset >> 3));
2270     OUT_BCS_BATCH(batch,
2271                   slice_param->slice_data_offset + (macroblock_offset >> 3));
2272     OUT_BCS_BATCH(batch,
2273                   slice_param->slice_vertical_position << 16 |
2274                   next_slice_start_vert_pos << 0);
2275     OUT_BCS_BATCH(batch,
2276                   (macroblock_offset & 0x7));
2277     ADVANCE_BCS_BATCH(batch);
2278 }
2279
2280 static void
2281 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
2282                             struct decode_state *decode_state,
2283                             struct gen7_mfd_context *gen7_mfd_context)
2284 {
2285     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2286     VAPictureParameterBufferVC1 *pic_param;
2287     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
2288     dri_bo *slice_data_bo;
2289     int i, j;
2290
2291     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2292     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2293
2294     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
2295     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2296     intel_batchbuffer_emit_mi_flush(batch);
2297     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2298     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2299     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2300     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2301     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
2302     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
2303     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
2304
2305     for (j = 0; j < decode_state->num_slice_params; j++) {
2306         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2307         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2308         slice_data_bo = decode_state->slice_datas[j]->bo;
2309         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
2310
2311         if (j == decode_state->num_slice_params - 1)
2312             next_slice_group_param = NULL;
2313         else
2314             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2315
2316         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2317             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2318
2319             if (i < decode_state->slice_params[j]->num_elements - 1)
2320                 next_slice_param = slice_param + 1;
2321             else
2322                 next_slice_param = next_slice_group_param;
2323
2324             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2325             slice_param++;
2326         }
2327     }
2328
2329     intel_batchbuffer_end_atomic(batch);
2330     intel_batchbuffer_flush(batch);
2331 }
2332
2333 static void
2334 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
2335                           struct decode_state *decode_state,
2336                           struct gen7_mfd_context *gen7_mfd_context)
2337 {
2338     struct object_surface *obj_surface;
2339     VAPictureParameterBufferJPEGBaseline *pic_param;
2340     int subsampling = SUBSAMPLE_YUV420;
2341     int fourcc = VA_FOURCC_IMC3;
2342
2343     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2344
2345     if (pic_param->num_components == 1) {
2346         subsampling = SUBSAMPLE_YUV400;
2347         fourcc = VA_FOURCC_Y800;
2348     } else if (pic_param->num_components == 3) {
2349         int h1 = pic_param->components[0].h_sampling_factor;
2350         int h2 = pic_param->components[1].h_sampling_factor;
2351         int h3 = pic_param->components[2].h_sampling_factor;
2352         int v1 = pic_param->components[0].v_sampling_factor;
2353         int v2 = pic_param->components[1].v_sampling_factor;
2354         int v3 = pic_param->components[2].v_sampling_factor;
2355
2356         if (h1 == 2 * h2 && h2 == h3 &&
2357             v1 == 2 * v2 && v2 == v3) {
2358             subsampling = SUBSAMPLE_YUV420;
2359             fourcc = VA_FOURCC_IMC3;
2360         } else if (h1 == 2 * h2  && h2 == h3 &&
2361                    v1 == v2 && v2 == v3) {
2362             subsampling = SUBSAMPLE_YUV422H;
2363             fourcc = VA_FOURCC_422H;
2364         } else if (h1 == h2 && h2 == h3 &&
2365                    v1 == v2  && v2 == v3) {
2366             subsampling = SUBSAMPLE_YUV444;
2367             fourcc = VA_FOURCC_444P;
2368         } else if (h1 == 4 * h2 && h2 ==  h3 &&
2369                    v1 == v2 && v2 == v3) {
2370             subsampling = SUBSAMPLE_YUV411;
2371             fourcc = VA_FOURCC_411P;
2372         } else if (h1 == h2 && h2 == h3 &&
2373                    v1 == 2 * v2 && v2 == v3) {
2374             subsampling = SUBSAMPLE_YUV422V;
2375             fourcc = VA_FOURCC_422V;
2376         } else
2377             assert(0);
2378     } else {
2379         assert(0);
2380     }
2381
2382     /* Current decoded picture */
2383     obj_surface = decode_state->render_object;
2384     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
2385
2386     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2387     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2388     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2389     gen7_mfd_context->pre_deblocking_output.valid = 1;
2390
2391     gen7_mfd_context->post_deblocking_output.bo = NULL;
2392     gen7_mfd_context->post_deblocking_output.valid = 0;
2393
2394     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2395     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2396
2397     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2398     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2399
2400     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2401     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2402
2403     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2404     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2405
2406     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2407     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2408 }
2409
2410 static const int va_to_gen7_jpeg_rotation[4] = {
2411     GEN7_JPEG_ROTATION_0,
2412     GEN7_JPEG_ROTATION_90,
2413     GEN7_JPEG_ROTATION_180,
2414     GEN7_JPEG_ROTATION_270
2415 };
2416
2417 static void
2418 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
2419                         struct decode_state *decode_state,
2420                         struct gen7_mfd_context *gen7_mfd_context)
2421 {
2422     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2423     VAPictureParameterBufferJPEGBaseline *pic_param;
2424     int chroma_type = GEN7_YUV420;
2425     int frame_width_in_blks;
2426     int frame_height_in_blks;
2427
2428     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2429     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2430
2431     if (pic_param->num_components == 1)
2432         chroma_type = GEN7_YUV400;
2433     else if (pic_param->num_components == 3) {
2434         int h1 = pic_param->components[0].h_sampling_factor;
2435         int h2 = pic_param->components[1].h_sampling_factor;
2436         int h3 = pic_param->components[2].h_sampling_factor;
2437         int v1 = pic_param->components[0].v_sampling_factor;
2438         int v2 = pic_param->components[1].v_sampling_factor;
2439         int v3 = pic_param->components[2].v_sampling_factor;
2440
2441         if (h1 == 2 * h2 && h2 == h3 &&
2442             v1 == 2 * v2 && v2 == v3)
2443             chroma_type = GEN7_YUV420;
2444         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2445                  v1 == 1 && v2 == 1 && v3 == 1)
2446             chroma_type = GEN7_YUV422H_2Y;
2447         else if (h1 == h2 && h2 == h3 &&
2448                  v1 == v2 && v2 == v3)
2449             chroma_type = GEN7_YUV444;
2450         else if (h1 == 4 * h2 && h2 == h3 &&
2451                  v1 == v2 && v2 == v3)
2452             chroma_type = GEN7_YUV411;
2453         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2454                  v1 == 2 && v2 == 1 && v3 == 1)
2455             chroma_type = GEN7_YUV422V_2Y;
2456         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2457                  v1 == 2 && v2 == 2 && v3 == 2)
2458             chroma_type = GEN7_YUV422H_4Y;
2459         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2460                  v1 == 2 && v2 == 1 && v3 == 1)
2461             chroma_type = GEN7_YUV422V_4Y;
2462         else
2463             assert(0);
2464     }
2465
2466     if (chroma_type == GEN7_YUV400 ||
2467         chroma_type == GEN7_YUV444 ||
2468         chroma_type == GEN7_YUV422V_2Y) {
2469         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2470         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2471     } else if (chroma_type == GEN7_YUV411) {
2472         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2473         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2474     } else {
2475         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2476         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2477     }
2478
2479     BEGIN_BCS_BATCH(batch, 3);
2480     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2481     OUT_BCS_BATCH(batch,
2482                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2483                   (chroma_type << 0));
2484     OUT_BCS_BATCH(batch,
2485                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2486                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2487     ADVANCE_BCS_BATCH(batch);
2488 }
2489
2490 static const int va_to_gen7_jpeg_hufftable[2] = {
2491     MFX_HUFFTABLE_ID_Y,
2492     MFX_HUFFTABLE_ID_UV
2493 };
2494
2495 static void
2496 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2497                                struct decode_state *decode_state,
2498                                struct gen7_mfd_context *gen7_mfd_context,
2499                                int num_tables)
2500 {
2501     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2502     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2503     int index;
2504
2505     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2506         return;
2507
2508     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2509
2510     for (index = 0; index < num_tables; index++) {
2511         int id = va_to_gen7_jpeg_hufftable[index];
2512         if (!huffman_table->load_huffman_table[index])
2513             continue;
2514         BEGIN_BCS_BATCH(batch, 53);
2515         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2516         OUT_BCS_BATCH(batch, id);
2517         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2518         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2519         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2520         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2521         ADVANCE_BCS_BATCH(batch);
2522     }
2523 }
2524
2525 static const int va_to_gen7_jpeg_qm[5] = {
2526     -1,
2527     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2528     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2529     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2530     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2531 };
2532
2533 static void
2534 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2535                        struct decode_state *decode_state,
2536                        struct gen7_mfd_context *gen7_mfd_context)
2537 {
2538     VAPictureParameterBufferJPEGBaseline *pic_param;
2539     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2540     int index;
2541
2542     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2543         return;
2544
2545     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2546     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2547
2548     assert(pic_param->num_components <= 3);
2549
2550     for (index = 0; index < pic_param->num_components; index++) {
2551         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2552         int qm_type;
2553         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2554         unsigned char raster_qm[64];
2555         int j;
2556
2557         if (id > 4 || id < 1)
2558             continue;
2559
2560         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2561             continue;
2562
2563         qm_type = va_to_gen7_jpeg_qm[id];
2564
2565         for (j = 0; j < 64; j++)
2566             raster_qm[zigzag_direct[j]] = qm[j];
2567
2568         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2569     }
2570 }
2571
2572 static void
2573 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2574                          VAPictureParameterBufferJPEGBaseline *pic_param,
2575                          VASliceParameterBufferJPEGBaseline *slice_param,
2576                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2577                          dri_bo *slice_data_bo,
2578                          struct gen7_mfd_context *gen7_mfd_context)
2579 {
2580     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2581     int scan_component_mask = 0;
2582     int i;
2583
2584     assert(slice_param->num_components > 0);
2585     assert(slice_param->num_components < 4);
2586     assert(slice_param->num_components <= pic_param->num_components);
2587
2588     for (i = 0; i < slice_param->num_components; i++) {
2589         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2590         case 1:
2591             scan_component_mask |= (1 << 0);
2592             break;
2593         case 2:
2594             scan_component_mask |= (1 << 1);
2595             break;
2596         case 3:
2597             scan_component_mask |= (1 << 2);
2598             break;
2599         default:
2600             assert(0);
2601             break;
2602         }
2603     }
2604
2605     BEGIN_BCS_BATCH(batch, 6);
2606     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2607     OUT_BCS_BATCH(batch,
2608                   slice_param->slice_data_size);
2609     OUT_BCS_BATCH(batch,
2610                   slice_param->slice_data_offset);
2611     OUT_BCS_BATCH(batch,
2612                   slice_param->slice_horizontal_position << 16 |
2613                   slice_param->slice_vertical_position << 0);
2614     OUT_BCS_BATCH(batch,
2615                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2616                   (scan_component_mask << 27) |                 /* scan components */
2617                   (0 << 26) |   /* disable interrupt allowed */
2618                   (slice_param->num_mcus << 0));                /* MCU count */
2619     OUT_BCS_BATCH(batch,
2620                   (slice_param->restart_interval << 0));    /* RestartInterval */
2621     ADVANCE_BCS_BATCH(batch);
2622 }
2623
2624 /* Workaround for JPEG decoding on Ivybridge */
2625 #ifdef JPEG_WA
2626
2627 static struct {
2628     int width;
2629     int height;
2630     unsigned char data[32];
2631     int data_size;
2632     int data_bit_offset;
2633     int qp;
2634 } gen7_jpeg_wa_clip = {
2635     16,
2636     16,
2637     {
2638         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2639         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2640     },
2641     14,
2642     40,
2643     28,
2644 };
2645
2646 static void
2647 gen8_jpeg_wa_init(VADriverContextP ctx,
2648                   struct gen7_mfd_context *gen7_mfd_context)
2649 {
2650     struct i965_driver_data *i965 = i965_driver_data(ctx);
2651     VAStatus status;
2652     struct object_surface *obj_surface;
2653
2654     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2655         i965_DestroySurfaces(ctx,
2656                              &gen7_mfd_context->jpeg_wa_surface_id,
2657                              1);
2658
2659     status = i965_CreateSurfaces(ctx,
2660                                  gen7_jpeg_wa_clip.width,
2661                                  gen7_jpeg_wa_clip.height,
2662                                  VA_RT_FORMAT_YUV420,
2663                                  1,
2664                                  &gen7_mfd_context->jpeg_wa_surface_id);
2665     assert(status == VA_STATUS_SUCCESS);
2666
2667     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2668     assert(obj_surface);
2669     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2670     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2671
2672     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2673         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2674                                                                "JPEG WA data",
2675                                                                0x1000,
2676                                                                0x1000);
2677         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2678                        0,
2679                        gen7_jpeg_wa_clip.data_size,
2680                        gen7_jpeg_wa_clip.data);
2681     }
2682 }
2683
2684 static void
2685 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2686                               struct gen7_mfd_context *gen7_mfd_context)
2687 {
2688     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2689
2690     BEGIN_BCS_BATCH(batch, 5);
2691     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2692     OUT_BCS_BATCH(batch,
2693                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2694                   (MFD_MODE_VLD << 15) | /* VLD mode */
2695                   (0 << 10) | /* disable Stream-Out */
2696                   (0 << 9)  | /* Post Deblocking Output */
2697                   (1 << 8)  | /* Pre Deblocking Output */
2698                   (0 << 5)  | /* not in stitch mode */
2699                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2700                   (MFX_FORMAT_AVC << 0));
2701     OUT_BCS_BATCH(batch,
2702                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2703                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2704                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2705                   (0 << 1)  |
2706                   (0 << 0));
2707     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2708     OUT_BCS_BATCH(batch, 0); /* reserved */
2709     ADVANCE_BCS_BATCH(batch);
2710 }
2711
2712 static void
2713 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2714                            struct gen7_mfd_context *gen7_mfd_context)
2715 {
2716     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2717     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2718
2719     BEGIN_BCS_BATCH(batch, 6);
2720     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2721     OUT_BCS_BATCH(batch, 0);
2722     OUT_BCS_BATCH(batch,
2723                   ((obj_surface->orig_width - 1) << 18) |
2724                   ((obj_surface->orig_height - 1) << 4));
2725     OUT_BCS_BATCH(batch,
2726                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2727                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2728                   (0 << 22) | /* surface object control state, ignored */
2729                   ((obj_surface->width - 1) << 3) | /* pitch */
2730                   (0 << 2)  | /* must be 0 */
2731                   (1 << 1)  | /* must be tiled */
2732                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2733     OUT_BCS_BATCH(batch,
2734                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2735                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2736     OUT_BCS_BATCH(batch,
2737                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2738                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2739     ADVANCE_BCS_BATCH(batch);
2740 }
2741
2742 static void
2743 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2744                                  struct gen7_mfd_context *gen7_mfd_context)
2745 {
2746     struct i965_driver_data *i965 = i965_driver_data(ctx);
2747     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2748     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2749     dri_bo *intra_bo;
2750     int i;
2751
2752     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2753                             "intra row store",
2754                             128 * 64,
2755                             0x1000);
2756
2757     BEGIN_BCS_BATCH(batch, 61);
2758     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2759     OUT_BCS_RELOC64(batch,
2760                     obj_surface->bo,
2761                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2762                     0);
2763     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2764
2765
2766     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2767     OUT_BCS_BATCH(batch, 0);
2768     OUT_BCS_BATCH(batch, 0);
2769
2770     /* uncompressed-video & stream out 7-12 */
2771     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2772     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2773     OUT_BCS_BATCH(batch, 0);
2774     OUT_BCS_BATCH(batch, 0);
2775     OUT_BCS_BATCH(batch, 0);
2776     OUT_BCS_BATCH(batch, 0);
2777
2778     /* the DW 13-15 is for intra row store scratch */
2779     OUT_BCS_RELOC64(batch,
2780                     intra_bo,
2781                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2782                     0);
2783
2784     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2785
2786     /* the DW 16-18 is for deblocking filter */
2787     OUT_BCS_BATCH(batch, 0);
2788     OUT_BCS_BATCH(batch, 0);
2789     OUT_BCS_BATCH(batch, 0);
2790
2791     /* DW 19..50 */
2792     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2793         OUT_BCS_BATCH(batch, 0);
2794         OUT_BCS_BATCH(batch, 0);
2795     }
2796     OUT_BCS_BATCH(batch, 0);
2797
2798     /* the DW52-54 is for mb status address */
2799     OUT_BCS_BATCH(batch, 0);
2800     OUT_BCS_BATCH(batch, 0);
2801     OUT_BCS_BATCH(batch, 0);
2802     /* the DW56-60 is for ILDB & second ILDB address */
2803     OUT_BCS_BATCH(batch, 0);
2804     OUT_BCS_BATCH(batch, 0);
2805     OUT_BCS_BATCH(batch, 0);
2806     OUT_BCS_BATCH(batch, 0);
2807     OUT_BCS_BATCH(batch, 0);
2808     OUT_BCS_BATCH(batch, 0);
2809
2810     ADVANCE_BCS_BATCH(batch);
2811
2812     dri_bo_unreference(intra_bo);
2813 }
2814
2815 static void
2816 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2817                                      struct gen7_mfd_context *gen7_mfd_context)
2818 {
2819     struct i965_driver_data *i965 = i965_driver_data(ctx);
2820     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2821     dri_bo *bsd_mpc_bo, *mpr_bo;
2822
2823     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2824                               "bsd mpc row store",
2825                               11520, /* 1.5 * 120 * 64 */
2826                               0x1000);
2827
2828     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2829                           "mpr row store",
2830                           7680, /* 1. 0 * 120 * 64 */
2831                           0x1000);
2832
2833     BEGIN_BCS_BATCH(batch, 10);
2834     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2835
2836     OUT_BCS_RELOC64(batch,
2837                     bsd_mpc_bo,
2838                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2839                     0);
2840
2841     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2842
2843     OUT_BCS_RELOC64(batch,
2844                     mpr_bo,
2845                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2846                     0);
2847     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2848
2849     OUT_BCS_BATCH(batch, 0);
2850     OUT_BCS_BATCH(batch, 0);
2851     OUT_BCS_BATCH(batch, 0);
2852
2853     ADVANCE_BCS_BATCH(batch);
2854
2855     dri_bo_unreference(bsd_mpc_bo);
2856     dri_bo_unreference(mpr_bo);
2857 }
2858
2859 static void
2860 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2861                           struct gen7_mfd_context *gen7_mfd_context)
2862 {
2863
2864 }
2865
2866 static void
2867 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2868                            struct gen7_mfd_context *gen7_mfd_context)
2869 {
2870     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2871     int img_struct = 0;
2872     int mbaff_frame_flag = 0;
2873     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2874
2875     BEGIN_BCS_BATCH(batch, 16);
2876     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2877     OUT_BCS_BATCH(batch,
2878                   width_in_mbs * height_in_mbs);
2879     OUT_BCS_BATCH(batch,
2880                   ((height_in_mbs - 1) << 16) |
2881                   ((width_in_mbs - 1) << 0));
2882     OUT_BCS_BATCH(batch,
2883                   (0 << 24) |
2884                   (0 << 16) |
2885                   (0 << 14) |
2886                   (0 << 13) |
2887                   (0 << 12) | /* differ from GEN6 */
2888                   (0 << 10) |
2889                   (img_struct << 8));
2890     OUT_BCS_BATCH(batch,
2891                   (1 << 10) | /* 4:2:0 */
2892                   (1 << 7) |  /* CABAC */
2893                   (0 << 6) |
2894                   (0 << 5) |
2895                   (0 << 4) |
2896                   (0 << 3) |
2897                   (1 << 2) |
2898                   (mbaff_frame_flag << 1) |
2899                   (0 << 0));
2900     OUT_BCS_BATCH(batch, 0);
2901     OUT_BCS_BATCH(batch, 0);
2902     OUT_BCS_BATCH(batch, 0);
2903     OUT_BCS_BATCH(batch, 0);
2904     OUT_BCS_BATCH(batch, 0);
2905     OUT_BCS_BATCH(batch, 0);
2906     OUT_BCS_BATCH(batch, 0);
2907     OUT_BCS_BATCH(batch, 0);
2908     OUT_BCS_BATCH(batch, 0);
2909     OUT_BCS_BATCH(batch, 0);
2910     OUT_BCS_BATCH(batch, 0);
2911     ADVANCE_BCS_BATCH(batch);
2912 }
2913
2914 static void
2915 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2916                                   struct gen7_mfd_context *gen7_mfd_context)
2917 {
2918     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2919     int i;
2920
2921     BEGIN_BCS_BATCH(batch, 71);
2922     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2923
2924     /* reference surfaces 0..15 */
2925     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2926         OUT_BCS_BATCH(batch, 0); /* top */
2927         OUT_BCS_BATCH(batch, 0); /* bottom */
2928     }
2929
2930     OUT_BCS_BATCH(batch, 0);
2931
2932     /* the current decoding frame/field */
2933     OUT_BCS_BATCH(batch, 0); /* top */
2934     OUT_BCS_BATCH(batch, 0);
2935     OUT_BCS_BATCH(batch, 0);
2936
2937     /* POC List */
2938     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2939         OUT_BCS_BATCH(batch, 0);
2940         OUT_BCS_BATCH(batch, 0);
2941     }
2942
2943     OUT_BCS_BATCH(batch, 0);
2944     OUT_BCS_BATCH(batch, 0);
2945
2946     ADVANCE_BCS_BATCH(batch);
2947 }
2948
2949 static void
2950 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2951                                      struct gen7_mfd_context *gen7_mfd_context)
2952 {
2953     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2954
2955     BEGIN_BCS_BATCH(batch, 11);
2956     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2957     OUT_BCS_RELOC64(batch,
2958                     gen7_mfd_context->jpeg_wa_slice_data_bo,
2959                     I915_GEM_DOMAIN_INSTRUCTION, 0,
2960                     0);
2961     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2962     OUT_BCS_BATCH(batch, 0);
2963     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2964     OUT_BCS_BATCH(batch, 0);
2965     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2966     OUT_BCS_BATCH(batch, 0);
2967     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2968     OUT_BCS_BATCH(batch, 0);
2969     ADVANCE_BCS_BATCH(batch);
2970 }
2971
2972 static void
2973 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2974                             struct gen7_mfd_context *gen7_mfd_context)
2975 {
2976     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2977
2978     /* the input bitsteam format on GEN7 differs from GEN6 */
2979     BEGIN_BCS_BATCH(batch, 6);
2980     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2981     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2982     OUT_BCS_BATCH(batch, 0);
2983     OUT_BCS_BATCH(batch,
2984                   (0 << 31) |
2985                   (0 << 14) |
2986                   (0 << 12) |
2987                   (0 << 10) |
2988                   (0 << 8));
2989     OUT_BCS_BATCH(batch,
2990                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2991                   (0 << 5)  |
2992                   (0 << 4)  |
2993                   (1 << 3) | /* LastSlice Flag */
2994                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2995     OUT_BCS_BATCH(batch, 0);
2996     ADVANCE_BCS_BATCH(batch);
2997 }
2998
2999 static void
3000 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
3001                              struct gen7_mfd_context *gen7_mfd_context)
3002 {
3003     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3004     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
3005     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
3006     int first_mb_in_slice = 0;
3007     int slice_type = SLICE_TYPE_I;
3008
3009     BEGIN_BCS_BATCH(batch, 11);
3010     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3011     OUT_BCS_BATCH(batch, slice_type);
3012     OUT_BCS_BATCH(batch,
3013                   (num_ref_idx_l1 << 24) |
3014                   (num_ref_idx_l0 << 16) |
3015                   (0 << 8) |
3016                   (0 << 0));
3017     OUT_BCS_BATCH(batch,
3018                   (0 << 29) |
3019                   (1 << 27) |   /* disable Deblocking */
3020                   (0 << 24) |
3021                   (gen7_jpeg_wa_clip.qp << 16) |
3022                   (0 << 8) |
3023                   (0 << 0));
3024     OUT_BCS_BATCH(batch,
3025                   (slice_ver_pos << 24) |
3026                   (slice_hor_pos << 16) |
3027                   (first_mb_in_slice << 0));
3028     OUT_BCS_BATCH(batch,
3029                   (next_slice_ver_pos << 16) |
3030                   (next_slice_hor_pos << 0));
3031     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
3032     OUT_BCS_BATCH(batch, 0);
3033     OUT_BCS_BATCH(batch, 0);
3034     OUT_BCS_BATCH(batch, 0);
3035     OUT_BCS_BATCH(batch, 0);
3036     ADVANCE_BCS_BATCH(batch);
3037 }
3038
3039 static void
3040 gen8_mfd_jpeg_wa(VADriverContextP ctx,
3041                  struct gen7_mfd_context *gen7_mfd_context)
3042 {
3043     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3044     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
3045     intel_batchbuffer_emit_mi_flush(batch);
3046     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
3047     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
3048     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
3049     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
3050     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
3051     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
3052     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
3053
3054     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
3055     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
3056     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
3057 }
3058
3059 #endif
3060
3061 void
3062 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
3063                              struct decode_state *decode_state,
3064                              struct gen7_mfd_context *gen7_mfd_context)
3065 {
3066     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3067     VAPictureParameterBufferJPEGBaseline *pic_param;
3068     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
3069     dri_bo *slice_data_bo;
3070     int i, j, max_selector = 0;
3071
3072     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3073     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
3074
3075     /* Currently only support Baseline DCT */
3076     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
3077     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3078 #ifdef JPEG_WA
3079     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
3080 #endif
3081     intel_batchbuffer_emit_mi_flush(batch);
3082     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3083     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3084     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3085     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
3086     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
3087
3088     for (j = 0; j < decode_state->num_slice_params; j++) {
3089         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3090         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3091         slice_data_bo = decode_state->slice_datas[j]->bo;
3092         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3093
3094         if (j == decode_state->num_slice_params - 1)
3095             next_slice_group_param = NULL;
3096         else
3097             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3098
3099         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3100             int component;
3101
3102             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3103
3104             if (i < decode_state->slice_params[j]->num_elements - 1)
3105                 next_slice_param = slice_param + 1;
3106             else
3107                 next_slice_param = next_slice_group_param;
3108
3109             for (component = 0; component < slice_param->num_components; component++) {
3110                 if (max_selector < slice_param->components[component].dc_table_selector)
3111                     max_selector = slice_param->components[component].dc_table_selector;
3112
3113                 if (max_selector < slice_param->components[component].ac_table_selector)
3114                     max_selector = slice_param->components[component].ac_table_selector;
3115             }
3116
3117             slice_param++;
3118         }
3119     }
3120
3121     assert(max_selector < 2);
3122     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
3123
3124     for (j = 0; j < decode_state->num_slice_params; j++) {
3125         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3126         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3127         slice_data_bo = decode_state->slice_datas[j]->bo;
3128         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3129
3130         if (j == decode_state->num_slice_params - 1)
3131             next_slice_group_param = NULL;
3132         else
3133             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3134
3135         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3136             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3137
3138             if (i < decode_state->slice_params[j]->num_elements - 1)
3139                 next_slice_param = slice_param + 1;
3140             else
3141                 next_slice_param = next_slice_group_param;
3142
3143             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
3144             slice_param++;
3145         }
3146     }
3147
3148     intel_batchbuffer_end_atomic(batch);
3149     intel_batchbuffer_flush(batch);
3150 }
3151
3152 static const int vp8_dc_qlookup[128] = {
3153     4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
3154     18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
3155     29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
3156     44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
3157     59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
3158     75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
3159     91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
3160     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
3161 };
3162
3163 static const int vp8_ac_qlookup[128] = {
3164     4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
3165     20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
3166     36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
3167     52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
3168     78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
3169     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
3170     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
3171     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
3172 };
3173
3174 static inline unsigned int vp8_clip_quantization_index(int index)
3175 {
3176     if (index > 127)
3177         return 127;
3178     else if (index < 0)
3179         return 0;
3180
3181     return index;
3182 }
3183
3184 static void
3185 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
3186                          struct decode_state *decode_state,
3187                          struct gen7_mfd_context *gen7_mfd_context)
3188 {
3189     struct object_surface *obj_surface;
3190     struct i965_driver_data *i965 = i965_driver_data(ctx);
3191     dri_bo *bo;
3192     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3193     int width_in_mbs = (pic_param->frame_width + 15) / 16;
3194     int height_in_mbs = (pic_param->frame_height + 15) / 16;
3195
3196     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
3197     assert(height_in_mbs > 0 && height_in_mbs <= 256);
3198
3199     intel_update_vp8_frame_store_index(ctx,
3200                                        decode_state,
3201                                        pic_param,
3202                                        gen7_mfd_context->reference_surface);
3203
3204     /* Current decoded picture */
3205     obj_surface = decode_state->render_object;
3206     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3207
3208     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3209     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
3210     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
3211     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
3212
3213     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3214     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
3215     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
3216     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
3217
3218     intel_ensure_vp8_segmentation_buffer(ctx,
3219                                          &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
3220
3221     /* The same as AVC */
3222     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3223     bo = dri_bo_alloc(i965->intel.bufmgr,
3224                       "intra row store",
3225                       width_in_mbs * 64,
3226                       0x1000);
3227     assert(bo);
3228     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
3229     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
3230
3231     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3232     bo = dri_bo_alloc(i965->intel.bufmgr,
3233                       "deblocking filter row store",
3234                       width_in_mbs * 64 * 4,
3235                       0x1000);
3236     assert(bo);
3237     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
3238     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
3239
3240     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3241     bo = dri_bo_alloc(i965->intel.bufmgr,
3242                       "bsd mpc row store",
3243                       width_in_mbs * 64 * 2,
3244                       0x1000);
3245     assert(bo);
3246     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
3247     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
3248
3249     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3250     bo = dri_bo_alloc(i965->intel.bufmgr,
3251                       "mpr row store",
3252                       width_in_mbs * 64 * 2,
3253                       0x1000);
3254     assert(bo);
3255     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
3256     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
3257
3258     gen7_mfd_context->bitplane_read_buffer.valid = 0;
3259 }
3260
3261 static void
3262 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
3263                        struct decode_state *decode_state,
3264                        struct gen7_mfd_context *gen7_mfd_context)
3265 {
3266     struct i965_driver_data *i965 = i965_driver_data(ctx);
3267     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3268     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3269     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
3270     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
3271     dri_bo *probs_bo = decode_state->probability_data->bo;
3272     int i, j, log2num;
3273     unsigned int quantization_value[4][6];
3274
3275     /* There is no safe way to error out if the segmentation buffer
3276        could not be allocated. So, instead of aborting, simply decode
3277        something even if the result may look totally inacurate */
3278     const unsigned int enable_segmentation =
3279         pic_param->pic_fields.bits.segmentation_enabled &&
3280         gen7_mfd_context->segmentation_buffer.valid;
3281
3282     log2num = (int)log2(slice_param->num_of_partitions - 1);
3283
3284     BEGIN_BCS_BATCH(batch, 38);
3285     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
3286     OUT_BCS_BATCH(batch,
3287                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
3288                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
3289     OUT_BCS_BATCH(batch,
3290                   log2num << 24 |
3291                   pic_param->pic_fields.bits.sharpness_level << 16 |
3292                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
3293                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
3294                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
3295                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
3296                   (enable_segmentation &&
3297                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
3298                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
3299                   (enable_segmentation &&
3300                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
3301                   (enable_segmentation &&
3302                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
3303                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
3304                   pic_param->pic_fields.bits.filter_type << 4 |
3305                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
3306                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
3307
3308     OUT_BCS_BATCH(batch,
3309                   pic_param->loop_filter_level[3] << 24 |
3310                   pic_param->loop_filter_level[2] << 16 |
3311                   pic_param->loop_filter_level[1] <<  8 |
3312                   pic_param->loop_filter_level[0] <<  0);
3313
3314     /* Quantizer Value for 4 segmetns, DW4-DW15 */
3315     for (i = 0; i < 4; i++) {
3316         quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
3317         quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
3318         quantization_value[i][2] = 2 * vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])]; /*y2dc*/
3319         /* 101581>>16 is equivalent to 155/100 */
3320         quantization_value[i][3] = (101581 * vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16; /*y2ac*/
3321         quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
3322         quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
3323
3324         quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
3325         quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
3326
3327         OUT_BCS_BATCH(batch,
3328                       quantization_value[i][0] << 16 | /* Y1AC */
3329                       quantization_value[i][1] <<  0); /* Y1DC */
3330         OUT_BCS_BATCH(batch,
3331                       quantization_value[i][5] << 16 | /* UVAC */
3332                       quantization_value[i][4] <<  0); /* UVDC */
3333         OUT_BCS_BATCH(batch,
3334                       quantization_value[i][3] << 16 | /* Y2AC */
3335                       quantization_value[i][2] <<  0); /* Y2DC */
3336     }
3337
3338     /* CoeffProbability table for non-key frame, DW16-DW18 */
3339     if (probs_bo) {
3340         OUT_BCS_RELOC64(batch, probs_bo,
3341                         0, I915_GEM_DOMAIN_INSTRUCTION,
3342                         0);
3343         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
3344     } else {
3345         OUT_BCS_BATCH(batch, 0);
3346         OUT_BCS_BATCH(batch, 0);
3347         OUT_BCS_BATCH(batch, 0);
3348     }
3349
3350     OUT_BCS_BATCH(batch,
3351                   pic_param->mb_segment_tree_probs[2] << 16 |
3352                   pic_param->mb_segment_tree_probs[1] <<  8 |
3353                   pic_param->mb_segment_tree_probs[0] <<  0);
3354
3355     OUT_BCS_BATCH(batch,
3356                   pic_param->prob_skip_false << 24 |
3357                   pic_param->prob_intra      << 16 |
3358                   pic_param->prob_last       <<  8 |
3359                   pic_param->prob_gf         <<  0);
3360
3361     OUT_BCS_BATCH(batch,
3362                   pic_param->y_mode_probs[3] << 24 |
3363                   pic_param->y_mode_probs[2] << 16 |
3364                   pic_param->y_mode_probs[1] <<  8 |
3365                   pic_param->y_mode_probs[0] <<  0);
3366
3367     OUT_BCS_BATCH(batch,
3368                   pic_param->uv_mode_probs[2] << 16 |
3369                   pic_param->uv_mode_probs[1] <<  8 |
3370                   pic_param->uv_mode_probs[0] <<  0);
3371
3372     /* MV update value, DW23-DW32 */
3373     for (i = 0; i < 2; i++) {
3374         for (j = 0; j < 20; j += 4) {
3375             OUT_BCS_BATCH(batch,
3376                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
3377                           pic_param->mv_probs[i][j + 2] << 16 |
3378                           pic_param->mv_probs[i][j + 1] <<  8 |
3379                           pic_param->mv_probs[i][j + 0] <<  0);
3380         }
3381     }
3382
3383     OUT_BCS_BATCH(batch,
3384                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
3385                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
3386                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
3387                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
3388
3389     OUT_BCS_BATCH(batch,
3390                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
3391                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
3392                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
3393                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
3394
3395     /* segmentation id stream base address, DW35-DW37 */
3396     if (enable_segmentation) {
3397         OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
3398                         0, I915_GEM_DOMAIN_INSTRUCTION,
3399                         0);
3400         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
3401     } else {
3402         OUT_BCS_BATCH(batch, 0);
3403         OUT_BCS_BATCH(batch, 0);
3404         OUT_BCS_BATCH(batch, 0);
3405     }
3406     ADVANCE_BCS_BATCH(batch);
3407 }
3408
3409 static void
3410 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
3411                         VAPictureParameterBufferVP8 *pic_param,
3412                         VASliceParameterBufferVP8 *slice_param,
3413                         dri_bo *slice_data_bo,
3414                         struct gen7_mfd_context *gen7_mfd_context)
3415 {
3416     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3417     int i, log2num;
3418     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7) >> 3);
3419     unsigned int used_bits = 8 - pic_param->bool_coder_ctx.count;
3420     unsigned int partition_size_0 = slice_param->partition_size[0];
3421
3422     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
3423     if (used_bits == 8) {
3424         used_bits = 0;
3425         offset += 1;
3426         partition_size_0 -= 1;
3427     }
3428
3429     assert(slice_param->num_of_partitions >= 2);
3430     assert(slice_param->num_of_partitions <= 9);
3431
3432     log2num = (int)log2(slice_param->num_of_partitions - 1);
3433
3434     BEGIN_BCS_BATCH(batch, 22);
3435     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
3436     OUT_BCS_BATCH(batch,
3437                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
3438                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
3439                   log2num << 4 |
3440                   (slice_param->macroblock_offset & 0x7));
3441     OUT_BCS_BATCH(batch,
3442                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3443                   0);
3444
3445     OUT_BCS_BATCH(batch, partition_size_0 + 1);
3446     OUT_BCS_BATCH(batch, offset);
3447     //partion sizes in bytes are present after the above first partition when there are more than one token partition
3448     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3449     for (i = 1; i < 9; i++) {
3450         if (i < slice_param->num_of_partitions) {
3451             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3452             OUT_BCS_BATCH(batch, offset);
3453         } else {
3454             OUT_BCS_BATCH(batch, 0);
3455             OUT_BCS_BATCH(batch, 0);
3456         }
3457
3458         offset += slice_param->partition_size[i];
3459     }
3460
3461     OUT_BCS_BATCH(batch, 0); /* concealment method */
3462
3463     ADVANCE_BCS_BATCH(batch);
3464 }
3465
3466 void
3467 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3468                             struct decode_state *decode_state,
3469                             struct gen7_mfd_context *gen7_mfd_context)
3470 {
3471     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3472     VAPictureParameterBufferVP8 *pic_param;
3473     VASliceParameterBufferVP8 *slice_param;
3474     dri_bo *slice_data_bo;
3475
3476     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3477     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3478
3479     /* one slice per frame */
3480     if (decode_state->num_slice_params != 1 ||
3481         (!decode_state->slice_params ||
3482          !decode_state->slice_params[0] ||
3483          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3484         (!decode_state->slice_datas ||
3485          !decode_state->slice_datas[0] ||
3486          !decode_state->slice_datas[0]->bo) ||
3487         !decode_state->probability_data) {
3488         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3489
3490         return;
3491     }
3492
3493     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3494     slice_data_bo = decode_state->slice_datas[0]->bo;
3495
3496     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3497     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3498     intel_batchbuffer_emit_mi_flush(batch);
3499     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3500     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3501     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3502     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3503     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3504     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3505     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3506     intel_batchbuffer_end_atomic(batch);
3507     intel_batchbuffer_flush(batch);
3508 }
3509
3510 static VAStatus
3511 gen8_mfd_decode_picture(VADriverContextP ctx,
3512                         VAProfile profile,
3513                         union codec_state *codec_state,
3514                         struct hw_context *hw_context)
3515
3516 {
3517     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3518     struct decode_state *decode_state = &codec_state->decode;
3519     VAStatus vaStatus;
3520
3521     assert(gen7_mfd_context);
3522
3523     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3524
3525     if (vaStatus != VA_STATUS_SUCCESS)
3526         goto out;
3527
3528     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3529
3530     switch (profile) {
3531     case VAProfileMPEG2Simple:
3532     case VAProfileMPEG2Main:
3533         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3534         break;
3535
3536     case VAProfileH264ConstrainedBaseline:
3537     case VAProfileH264Main:
3538     case VAProfileH264High:
3539     case VAProfileH264StereoHigh:
3540     case VAProfileH264MultiviewHigh:
3541         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3542         break;
3543
3544     case VAProfileVC1Simple:
3545     case VAProfileVC1Main:
3546     case VAProfileVC1Advanced:
3547         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3548         break;
3549
3550     case VAProfileJPEGBaseline:
3551         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3552         break;
3553
3554     case VAProfileVP8Version0_3:
3555         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3556         break;
3557
3558     default:
3559         assert(0);
3560         break;
3561     }
3562
3563     vaStatus = VA_STATUS_SUCCESS;
3564
3565 out:
3566     return vaStatus;
3567 }
3568
3569 static void
3570 gen8_mfd_context_destroy(void *hw_context)
3571 {
3572     VADriverContextP ctx;
3573     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3574
3575     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3576
3577     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3578     gen7_mfd_context->post_deblocking_output.bo = NULL;
3579
3580     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3581     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3582
3583     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3584     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3585
3586     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3587     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3588
3589     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3590     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3591
3592     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3593     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3594
3595     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3596     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3597
3598     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3599     gen7_mfd_context->segmentation_buffer.bo = NULL;
3600
3601     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3602
3603     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3604         i965_DestroySurfaces(ctx,
3605                              &gen7_mfd_context->jpeg_wa_surface_id,
3606                              1);
3607         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3608     }
3609
3610     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3611     free(gen7_mfd_context);
3612 }
3613
3614 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3615                                         struct gen7_mfd_context *gen7_mfd_context)
3616 {
3617     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3618     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3619     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3620     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3621 }
3622
3623 struct hw_context *
3624 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3625 {
3626     struct intel_driver_data *intel = intel_driver_data(ctx);
3627     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3628     int i;
3629
3630     if (!gen7_mfd_context)
3631         return NULL;
3632
3633     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3634     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3635     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3636
3637     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3638         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3639         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3640     }
3641
3642     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3643     gen7_mfd_context->segmentation_buffer.valid = 0;
3644
3645     switch (obj_config->profile) {
3646     case VAProfileMPEG2Simple:
3647     case VAProfileMPEG2Main:
3648         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3649         break;
3650
3651     case VAProfileH264ConstrainedBaseline:
3652     case VAProfileH264Main:
3653     case VAProfileH264High:
3654     case VAProfileH264StereoHigh:
3655     case VAProfileH264MultiviewHigh:
3656         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3657         break;
3658     default:
3659         break;
3660     }
3661
3662     gen7_mfd_context->driver_context = ctx;
3663     return (struct hw_context *)gen7_mfd_context;
3664 }