OSDN Git Service

genX_mfd: remove one-to-one mapping for picture_type
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV     2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77
78         if (!gen7_avc_surface)
79             return;
80
81         gen7_avc_surface->base.frame_store_id = -1;
82         assert((obj_surface->size & 0x3f) == 0);
83         obj_surface->private_data = gen7_avc_surface;
84     }
85
86     /* DMV buffers now relate to the whole frame, irrespective of
87        field coding modes */
88     if (gen7_avc_surface->dmv_top == NULL) {
89         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90                                                  "direct mv w/r buffer",
91                                                  width_in_mbs * height_in_mbs * 128,
92                                                  0x1000);
93         assert(gen7_avc_surface->dmv_top);
94     }
95 }
96
97 static void
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99                           struct decode_state *decode_state,
100                           int standard_select,
101                           struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG ||
109            standard_select == MFX_FORMAT_VP8);
110
111     BEGIN_BCS_BATCH(batch, 5);
112     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
113     OUT_BCS_BATCH(batch,
114                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
115                   (MFD_MODE_VLD << 15) | /* VLD mode */
116                   (0 << 10) | /* disable Stream-Out */
117                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
118                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
119                   (0 << 5)  | /* not in stitch mode */
120                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
121                   (standard_select << 0));
122     OUT_BCS_BATCH(batch,
123                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
124                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
125                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
126                   (0 << 1)  |
127                   (0 << 0));
128     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
129     OUT_BCS_BATCH(batch, 0); /* reserved */
130     ADVANCE_BCS_BATCH(batch);
131 }
132
133 static void
134 gen8_mfd_surface_state(VADriverContextP ctx,
135                        struct decode_state *decode_state,
136                        int standard_select,
137                        struct gen7_mfd_context *gen7_mfd_context)
138 {
139     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140     struct object_surface *obj_surface = decode_state->render_object;
141     unsigned int y_cb_offset;
142     unsigned int y_cr_offset;
143     unsigned int surface_format;
144
145     assert(obj_surface);
146
147     y_cb_offset = obj_surface->y_cb_offset;
148     y_cr_offset = obj_surface->y_cr_offset;
149
150     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151                      MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
152
153     BEGIN_BCS_BATCH(batch, 6);
154     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155     OUT_BCS_BATCH(batch, 0);
156     OUT_BCS_BATCH(batch,
157                   ((obj_surface->orig_height - 1) << 18) |
158                   ((obj_surface->orig_width - 1) << 4));
159     OUT_BCS_BATCH(batch,
160                   (surface_format << 28) | /* 420 planar YUV surface */
161                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162                   (0 << 22) | /* surface object control state, ignored */
163                   ((obj_surface->width - 1) << 3) | /* pitch */
164                   (0 << 2)  | /* must be 0 */
165                   (1 << 1)  | /* must be tiled */
166                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
167     OUT_BCS_BATCH(batch,
168                   (0 << 16) | /* X offset for U(Cb), must be 0 */
169                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
170     OUT_BCS_BATCH(batch,
171                   (0 << 16) | /* X offset for V(Cr), must be 0 */
172                   ((standard_select == MFX_FORMAT_JPEG ? y_cr_offset : 0) << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173     ADVANCE_BCS_BATCH(batch);
174 }
175
176 static void
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178                              struct decode_state *decode_state,
179                              int standard_select,
180                              struct gen7_mfd_context *gen7_mfd_context)
181 {
182     struct i965_driver_data *i965 = i965_driver_data(ctx);
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188     /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                         0);
193     else {
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197     }
198     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
199
200     /* Post-debloing 4-6 */
201     if (gen7_mfd_context->post_deblocking_output.valid)
202         OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
203                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                         0);
205     else {
206         OUT_BCS_BATCH(batch, 0);
207
208         OUT_BCS_BATCH(batch, 0);
209     }
210     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
211
212     /* uncompressed-video & stream out 7-12 */
213     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
214     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
215     OUT_BCS_BATCH(batch, 0);
216     OUT_BCS_BATCH(batch, 0);
217     OUT_BCS_BATCH(batch, 0);
218     OUT_BCS_BATCH(batch, 0);
219
220     /* intra row-store scratch 13-15 */
221     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
222         OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
223                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
224                         0);
225     else {
226         OUT_BCS_BATCH(batch, 0);
227
228         OUT_BCS_BATCH(batch, 0);
229     }
230     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
231
232     /* deblocking-filter-row-store 16-18 */
233     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
234         OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
235                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
236                         0);
237     else {
238         OUT_BCS_BATCH(batch, 0);
239         OUT_BCS_BATCH(batch, 0);
240     }
241
242     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
243
244     /* DW 19..50 */
245     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
246         struct object_surface *obj_surface;
247
248         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
249             gen7_mfd_context->reference_surface[i].obj_surface &&
250             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
251             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
252
253             OUT_BCS_RELOC64(batch, obj_surface->bo,
254                             I915_GEM_DOMAIN_INSTRUCTION, 0,
255                             0);
256         } else {
257             OUT_BCS_BATCH(batch, 0);
258             OUT_BCS_BATCH(batch, 0);
259         }
260
261     }
262
263     /* reference property 51 */
264     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
265
266     /* Macroblock status & ILDB 52-57 */
267     OUT_BCS_BATCH(batch, 0);
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271     OUT_BCS_BATCH(batch, 0);
272     OUT_BCS_BATCH(batch, 0);
273
274     /* the second Macroblock status 58-60 */
275     OUT_BCS_BATCH(batch, 0);
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch, 0);
278
279     ADVANCE_BCS_BATCH(batch);
280 }
281
282 static void
283 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
284                                  dri_bo *slice_data_bo,
285                                  int standard_select,
286                                  struct gen7_mfd_context *gen7_mfd_context)
287 {
288     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
289     struct i965_driver_data *i965 = i965_driver_data(ctx);
290
291     BEGIN_BCS_BATCH(batch, 26);
292     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
293     /* MFX In BS 1-5 */
294     OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
295     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
296     /* Upper bound 4-5 */
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299
300     /* MFX indirect MV 6-10 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307     /* MFX IT_COFF 11-15 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314     /* MFX IT_DBLK 16-20 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321     /* MFX PAK_BSE object for encoder 21-25 */
322     OUT_BCS_BATCH(batch, 0);
323     OUT_BCS_BATCH(batch, 0);
324     OUT_BCS_BATCH(batch, 0);
325     OUT_BCS_BATCH(batch, 0);
326     OUT_BCS_BATCH(batch, 0);
327
328     ADVANCE_BCS_BATCH(batch);
329 }
330
331 static void
332 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
333                                  struct decode_state *decode_state,
334                                  int standard_select,
335                                  struct gen7_mfd_context *gen7_mfd_context)
336 {
337     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
338     struct i965_driver_data *i965 = i965_driver_data(ctx);
339
340     BEGIN_BCS_BATCH(batch, 10);
341     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
342
343     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
344         OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
345                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
346                         0);
347     else {
348         OUT_BCS_BATCH(batch, 0);
349         OUT_BCS_BATCH(batch, 0);
350     }
351
352     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
353     /* MPR Row Store Scratch buffer 4-6 */
354     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
355         OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
356                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
357                         0);
358     else {
359         OUT_BCS_BATCH(batch, 0);
360         OUT_BCS_BATCH(batch, 0);
361     }
362
363     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
364
365     /* Bitplane 7-9 */
366     if (gen7_mfd_context->bitplane_read_buffer.valid)
367         OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
368                         I915_GEM_DOMAIN_INSTRUCTION, 0,
369                         0);
370     else {
371         OUT_BCS_BATCH(batch, 0);
372         OUT_BCS_BATCH(batch, 0);
373     }
374     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
375     ADVANCE_BCS_BATCH(batch);
376 }
377
378 static void
379 gen8_mfd_qm_state(VADriverContextP ctx,
380                   int qm_type,
381                   unsigned char *qm,
382                   int qm_length,
383                   struct gen7_mfd_context *gen7_mfd_context)
384 {
385     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
386     unsigned int qm_buffer[16];
387
388     assert(qm_length <= 16 * 4);
389     memcpy(qm_buffer, qm, qm_length);
390
391     BEGIN_BCS_BATCH(batch, 18);
392     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
393     OUT_BCS_BATCH(batch, qm_type << 0);
394     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
395     ADVANCE_BCS_BATCH(batch);
396 }
397
398 static void
399 gen8_mfd_avc_img_state(VADriverContextP ctx,
400                        struct decode_state *decode_state,
401                        struct gen7_mfd_context *gen7_mfd_context)
402 {
403     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
404     int img_struct;
405     int mbaff_frame_flag;
406     unsigned int width_in_mbs, height_in_mbs;
407     VAPictureParameterBufferH264 *pic_param;
408
409     assert(decode_state->pic_param && decode_state->pic_param->buffer);
410     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
412
413     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
414         img_struct = 1;
415     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
416         img_struct = 3;
417     else
418         img_struct = 0;
419
420     if ((img_struct & 0x1) == 0x1) {
421         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
422     } else {
423         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
424     }
425
426     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
427         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
428         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
429     } else {
430         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
431     }
432
433     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
434                         !pic_param->pic_fields.bits.field_pic_flag);
435
436     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
437     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
438
439     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
440     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
441            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
442     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
443
444     BEGIN_BCS_BATCH(batch, 17);
445     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
446     OUT_BCS_BATCH(batch,
447                   (width_in_mbs * height_in_mbs - 1));
448     OUT_BCS_BATCH(batch,
449                   ((height_in_mbs - 1) << 16) |
450                   ((width_in_mbs - 1) << 0));
451     OUT_BCS_BATCH(batch,
452                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
453                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
454                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
455                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
456                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
457                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
458                   (img_struct << 8));
459     OUT_BCS_BATCH(batch,
460                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
461                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
462                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
463                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
464                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
465                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
466                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
467                   (mbaff_frame_flag << 1) |
468                   (pic_param->pic_fields.bits.field_pic_flag << 0));
469     OUT_BCS_BATCH(batch, 0);
470     OUT_BCS_BATCH(batch, 0);
471     OUT_BCS_BATCH(batch, 0);
472     OUT_BCS_BATCH(batch, 0);
473     OUT_BCS_BATCH(batch, 0);
474     OUT_BCS_BATCH(batch, 0);
475     OUT_BCS_BATCH(batch, 0);
476     OUT_BCS_BATCH(batch, 0);
477     OUT_BCS_BATCH(batch, 0);
478     OUT_BCS_BATCH(batch, 0);
479     OUT_BCS_BATCH(batch, 0);
480     OUT_BCS_BATCH(batch, 0);
481     ADVANCE_BCS_BATCH(batch);
482 }
483
484 static void
485 gen8_mfd_avc_qm_state(VADriverContextP ctx,
486                       struct decode_state *decode_state,
487                       struct gen7_mfd_context *gen7_mfd_context)
488 {
489     VAIQMatrixBufferH264 *iq_matrix;
490     VAPictureParameterBufferH264 *pic_param;
491
492     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
493         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
494     else
495         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
496
497     assert(decode_state->pic_param && decode_state->pic_param->buffer);
498     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
499
500     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
501     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
502
503     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
504         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
505         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
506     }
507 }
508
509 static inline void
510 gen8_mfd_avc_picid_state(VADriverContextP ctx,
511                          struct decode_state *decode_state,
512                          struct gen7_mfd_context *gen7_mfd_context)
513 {
514     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
515                                gen7_mfd_context->reference_surface);
516 }
517
518 static void
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520                               struct decode_state *decode_state,
521                               VAPictureParameterBufferH264 *pic_param,
522                               VASliceParameterBufferH264 *slice_param,
523                               struct gen7_mfd_context *gen7_mfd_context)
524 {
525     struct i965_driver_data *i965 = i965_driver_data(ctx);
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
545                             I915_GEM_DOMAIN_INSTRUCTION, 0,
546                             0);
547         } else {
548             OUT_BCS_BATCH(batch, 0);
549             OUT_BCS_BATCH(batch, 0);
550         }
551     }
552
553     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
554
555     /* the current decoding frame/field */
556     va_pic = &pic_param->CurrPic;
557     obj_surface = decode_state->render_object;
558     assert(obj_surface->bo && obj_surface->private_data);
559     gen7_avc_surface = obj_surface->private_data;
560
561     OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
562                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
563                     0);
564
565     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
566
567     /* POC List */
568     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
569         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
570
571         if (obj_surface) {
572             const VAPictureH264 * const va_pic = avc_find_picture(
573                                                      obj_surface->base.id, pic_param->ReferenceFrames,
574                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
575
576             assert(va_pic != NULL);
577             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
578             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
579         } else {
580             OUT_BCS_BATCH(batch, 0);
581             OUT_BCS_BATCH(batch, 0);
582         }
583     }
584
585     va_pic = &pic_param->CurrPic;
586     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
587     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
588
589     ADVANCE_BCS_BATCH(batch);
590 }
591
592 static void
593 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
594                                  VAPictureParameterBufferH264 *pic_param,
595                                  VASliceParameterBufferH264 *next_slice_param,
596                                  struct gen7_mfd_context *gen7_mfd_context)
597 {
598     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
599 }
600
601 static void
602 gen8_mfd_avc_slice_state(VADriverContextP ctx,
603                          VAPictureParameterBufferH264 *pic_param,
604                          VASliceParameterBufferH264 *slice_param,
605                          VASliceParameterBufferH264 *next_slice_param,
606                          struct gen7_mfd_context *gen7_mfd_context)
607 {
608     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
609     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
610     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
611     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
612     int num_ref_idx_l0, num_ref_idx_l1;
613     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
614                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
615     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
616     int slice_type;
617
618     if (slice_param->slice_type == SLICE_TYPE_I ||
619         slice_param->slice_type == SLICE_TYPE_SI) {
620         slice_type = SLICE_TYPE_I;
621     } else if (slice_param->slice_type == SLICE_TYPE_P ||
622                slice_param->slice_type == SLICE_TYPE_SP) {
623         slice_type = SLICE_TYPE_P;
624     } else {
625         assert(slice_param->slice_type == SLICE_TYPE_B);
626         slice_type = SLICE_TYPE_B;
627     }
628
629     if (slice_type == SLICE_TYPE_I) {
630         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
631         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
632         num_ref_idx_l0 = 0;
633         num_ref_idx_l1 = 0;
634     } else if (slice_type == SLICE_TYPE_P) {
635         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
637         num_ref_idx_l1 = 0;
638     } else {
639         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
640         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
641     }
642
643     first_mb_in_slice = slice_param->first_mb_in_slice;
644     slice_hor_pos = first_mb_in_slice % width_in_mbs;
645     slice_ver_pos = first_mb_in_slice / width_in_mbs;
646
647     if (mbaff_picture)
648         slice_ver_pos = slice_ver_pos << 1;
649     if (next_slice_param) {
650         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
651         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
652         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
653
654         if (mbaff_picture)
655             next_slice_ver_pos = next_slice_ver_pos << 1;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch,
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch,
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch,
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) |
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch,
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch,
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
839                                        gen7_mfd_context->reference_surface);
840     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
841     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
842     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
843     assert(height_in_mbs > 0 && height_in_mbs <= 256);
844
845     /* Current decoded picture */
846     obj_surface = decode_state->render_object;
847     if (pic_param->pic_fields.bits.reference_pic_flag)
848         obj_surface->flags |= SURFACE_REFERENCED;
849     else
850         obj_surface->flags &= ~SURFACE_REFERENCED;
851
852     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
853     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
854
855     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
856     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
857     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
858     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
859
860     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
861     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
862     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
863     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
864
865     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
866     bo = dri_bo_alloc(i965->intel.bufmgr,
867                       "intra row store",
868                       width_in_mbs * 64,
869                       0x1000);
870     assert(bo);
871     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
872     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
873
874     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
875     bo = dri_bo_alloc(i965->intel.bufmgr,
876                       "deblocking filter row store",
877                       width_in_mbs * 64 * 4,
878                       0x1000);
879     assert(bo);
880     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
881     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
882
883     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
884     bo = dri_bo_alloc(i965->intel.bufmgr,
885                       "bsd mpc row store",
886                       width_in_mbs * 64 * 2,
887                       0x1000);
888     assert(bo);
889     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
890     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
891
892     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
893     bo = dri_bo_alloc(i965->intel.bufmgr,
894                       "mpr row store",
895                       width_in_mbs * 64 * 2,
896                       0x1000);
897     assert(bo);
898     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
899     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
900
901     gen7_mfd_context->bitplane_read_buffer.valid = 0;
902 }
903
904 static void
905 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
906                             struct decode_state *decode_state,
907                             struct gen7_mfd_context *gen7_mfd_context)
908 {
909     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
910     VAPictureParameterBufferH264 *pic_param;
911     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
912     dri_bo *slice_data_bo;
913     int i, j;
914
915     assert(decode_state->pic_param && decode_state->pic_param->buffer);
916     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
917     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
918
919     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
920     intel_batchbuffer_emit_mi_flush(batch);
921     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
923     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
924     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
925     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
926     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
927     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
928
929     for (j = 0; j < decode_state->num_slice_params; j++) {
930         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
931         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
932         slice_data_bo = decode_state->slice_datas[j]->bo;
933         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
934
935         if (j == decode_state->num_slice_params - 1)
936             next_slice_group_param = NULL;
937         else
938             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
939
940         if (j == 0 && slice_param->first_mb_in_slice)
941             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
942
943         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
944             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
945             assert((slice_param->slice_type == SLICE_TYPE_I) ||
946                    (slice_param->slice_type == SLICE_TYPE_SI) ||
947                    (slice_param->slice_type == SLICE_TYPE_P) ||
948                    (slice_param->slice_type == SLICE_TYPE_SP) ||
949                    (slice_param->slice_type == SLICE_TYPE_B));
950
951             if (i < decode_state->slice_params[j]->num_elements - 1)
952                 next_slice_param = slice_param + 1;
953             else
954                 next_slice_param = next_slice_group_param;
955
956             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
957             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
958             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
959             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
960             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
961             slice_param++;
962         }
963     }
964
965     intel_batchbuffer_end_atomic(batch);
966     intel_batchbuffer_flush(batch);
967 }
968
969 static void
970 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
971                            struct decode_state *decode_state,
972                            struct gen7_mfd_context *gen7_mfd_context)
973 {
974     VAPictureParameterBufferMPEG2 *pic_param;
975     struct i965_driver_data *i965 = i965_driver_data(ctx);
976     struct object_surface *obj_surface;
977     dri_bo *bo;
978     unsigned int width_in_mbs;
979
980     assert(decode_state->pic_param && decode_state->pic_param->buffer);
981     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
982     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
983
984     mpeg2_set_reference_surfaces(
985         ctx,
986         gen7_mfd_context->reference_surface,
987         decode_state,
988         pic_param
989     );
990
991     /* Current decoded picture */
992     obj_surface = decode_state->render_object;
993     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
994
995     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
996     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
997     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
998     gen7_mfd_context->pre_deblocking_output.valid = 1;
999
1000     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1001     bo = dri_bo_alloc(i965->intel.bufmgr,
1002                       "bsd mpc row store",
1003                       width_in_mbs * 96,
1004                       0x1000);
1005     assert(bo);
1006     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1007     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1008
1009     gen7_mfd_context->post_deblocking_output.valid = 0;
1010     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1011     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1012     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1013     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1014 }
1015
1016 static void
1017 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1018                          struct decode_state *decode_state,
1019                          struct gen7_mfd_context *gen7_mfd_context)
1020 {
1021     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1022     VAPictureParameterBufferMPEG2 *pic_param;
1023     unsigned int slice_concealment_disable_bit = 0;
1024
1025     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1026     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1027
1028     slice_concealment_disable_bit = 1;
1029
1030     BEGIN_BCS_BATCH(batch, 13);
1031     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1032     OUT_BCS_BATCH(batch,
1033                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1034                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1035                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1036                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1037                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1038                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1039                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1040                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1041                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1042                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1043                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1044                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1045     OUT_BCS_BATCH(batch,
1046                   pic_param->picture_coding_type << 9);
1047     OUT_BCS_BATCH(batch,
1048                   (slice_concealment_disable_bit << 31) |
1049                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1050                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1051     OUT_BCS_BATCH(batch, 0);
1052     OUT_BCS_BATCH(batch, 0);
1053     OUT_BCS_BATCH(batch, 0);
1054     OUT_BCS_BATCH(batch, 0);
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     ADVANCE_BCS_BATCH(batch);
1061 }
1062
1063 static void
1064 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1065                         struct decode_state *decode_state,
1066                         struct gen7_mfd_context *gen7_mfd_context)
1067 {
1068     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1069     int i, j;
1070
1071     /* Update internal QM state */
1072     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1073         VAIQMatrixBufferMPEG2 * const iq_matrix =
1074             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1075
1076         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1077             iq_matrix->load_intra_quantiser_matrix) {
1078             gen_iq_matrix->load_intra_quantiser_matrix =
1079                 iq_matrix->load_intra_quantiser_matrix;
1080             if (iq_matrix->load_intra_quantiser_matrix) {
1081                 for (j = 0; j < 64; j++)
1082                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1083                         iq_matrix->intra_quantiser_matrix[j];
1084             }
1085         }
1086
1087         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1088             iq_matrix->load_non_intra_quantiser_matrix) {
1089             gen_iq_matrix->load_non_intra_quantiser_matrix =
1090                 iq_matrix->load_non_intra_quantiser_matrix;
1091             if (iq_matrix->load_non_intra_quantiser_matrix) {
1092                 for (j = 0; j < 64; j++)
1093                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1094                         iq_matrix->non_intra_quantiser_matrix[j];
1095             }
1096         }
1097     }
1098
1099     /* Commit QM state to HW */
1100     for (i = 0; i < 2; i++) {
1101         unsigned char *qm = NULL;
1102         int qm_type;
1103
1104         if (i == 0) {
1105             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1106                 qm = gen_iq_matrix->intra_quantiser_matrix;
1107                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1108             }
1109         } else {
1110             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1111                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1112                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1113             }
1114         }
1115
1116         if (!qm)
1117             continue;
1118
1119         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1120     }
1121 }
1122
1123 static void
1124 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1125                           VAPictureParameterBufferMPEG2 *pic_param,
1126                           VASliceParameterBufferMPEG2 *slice_param,
1127                           VASliceParameterBufferMPEG2 *next_slice_param,
1128                           struct gen7_mfd_context *gen7_mfd_context)
1129 {
1130     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1131     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1132     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1133
1134     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1135         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1136         is_field_pic = 1;
1137     is_field_pic_wa = is_field_pic &&
1138                       gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1139
1140     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1141     hpos0 = slice_param->slice_horizontal_position;
1142
1143     if (next_slice_param == NULL) {
1144         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1145         hpos1 = 0;
1146     } else {
1147         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1148         hpos1 = next_slice_param->slice_horizontal_position;
1149     }
1150
1151     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1152
1153     BEGIN_BCS_BATCH(batch, 5);
1154     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1155     OUT_BCS_BATCH(batch,
1156                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1157     OUT_BCS_BATCH(batch,
1158                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1159     OUT_BCS_BATCH(batch,
1160                   hpos0 << 24 |
1161                   vpos0 << 16 |
1162                   mb_count << 8 |
1163                   (next_slice_param == NULL) << 5 |
1164                   (next_slice_param == NULL) << 3 |
1165                   (slice_param->macroblock_offset & 0x7));
1166     OUT_BCS_BATCH(batch,
1167                   (slice_param->quantiser_scale_code << 24) |
1168                   (vpos1 << 8 | hpos1));
1169     ADVANCE_BCS_BATCH(batch);
1170 }
1171
1172 static void
1173 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1174                               struct decode_state *decode_state,
1175                               struct gen7_mfd_context *gen7_mfd_context)
1176 {
1177     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1178     VAPictureParameterBufferMPEG2 *pic_param;
1179     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1180     dri_bo *slice_data_bo;
1181     int i, j;
1182
1183     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1184     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1185
1186     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1187     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1188     intel_batchbuffer_emit_mi_flush(batch);
1189     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1194     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1195
1196     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1197         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1198             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1199
1200     for (j = 0; j < decode_state->num_slice_params; j++) {
1201         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1202         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1203         slice_data_bo = decode_state->slice_datas[j]->bo;
1204         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1205
1206         if (j == decode_state->num_slice_params - 1)
1207             next_slice_group_param = NULL;
1208         else
1209             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1210
1211         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1212             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1213
1214             if (i < decode_state->slice_params[j]->num_elements - 1)
1215                 next_slice_param = slice_param + 1;
1216             else
1217                 next_slice_param = next_slice_group_param;
1218
1219             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1220             slice_param++;
1221         }
1222     }
1223
1224     intel_batchbuffer_end_atomic(batch);
1225     intel_batchbuffer_flush(batch);
1226 }
1227
1228 static const int va_to_gen7_vc1_mv[4] = {
1229     1, /* 1-MV */
1230     2, /* 1-MV half-pel */
1231     3, /* 1-MV half-pef bilinear */
1232     0, /* Mixed MV */
1233 };
1234
1235 static const int b_picture_scale_factor[21] = {
1236     128, 85,  170, 64,  192,
1237     51,  102, 153, 204, 43,
1238     215, 37,  74,  111, 148,
1239     185, 222, 32,  96,  160,
1240     224,
1241 };
1242
1243 static const int va_to_gen7_vc1_condover[3] = {
1244     0,
1245     2,
1246     3
1247 };
1248
1249 static const int va_to_gen7_vc1_profile[4] = {
1250     GEN7_VC1_SIMPLE_PROFILE,
1251     GEN7_VC1_MAIN_PROFILE,
1252     GEN7_VC1_RESERVED_PROFILE,
1253     GEN7_VC1_ADVANCED_PROFILE
1254 };
1255
1256 static void
1257 gen8_mfd_free_vc1_surface(void **data)
1258 {
1259     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1260
1261     if (!gen7_vc1_surface)
1262         return;
1263
1264     dri_bo_unreference(gen7_vc1_surface->dmv);
1265     free(gen7_vc1_surface);
1266     *data = NULL;
1267 }
1268
1269 static void
1270 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1271                           VAPictureParameterBufferVC1 *pic_param,
1272                           struct object_surface *obj_surface)
1273 {
1274     struct i965_driver_data *i965 = i965_driver_data(ctx);
1275     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1276     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1277
1278     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1279
1280     if (!gen7_vc1_surface) {
1281         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1282
1283         if (!gen7_vc1_surface)
1284             return;
1285
1286         assert((obj_surface->size & 0x3f) == 0);
1287         obj_surface->private_data = gen7_vc1_surface;
1288     }
1289
1290     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1291     gen7_vc1_surface->intensity_compensation = 0;
1292     gen7_vc1_surface->luma_scale = 0;
1293     gen7_vc1_surface->luma_shift = 0;
1294
1295     if (gen7_vc1_surface->dmv == NULL) {
1296         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1297                                              "direct mv w/r buffer",
1298                                              128 * height_in_mbs * 64,
1299                                              0x1000);
1300     }
1301 }
1302
1303 static void
1304 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1305                          struct decode_state *decode_state,
1306                          struct gen7_mfd_context *gen7_mfd_context)
1307 {
1308     VAPictureParameterBufferVC1 *pic_param;
1309     struct i965_driver_data *i965 = i965_driver_data(ctx);
1310     struct object_surface *obj_surface;
1311     dri_bo *bo;
1312     int width_in_mbs;
1313     int picture_type;
1314     int intensity_compensation;
1315
1316     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1317     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1318     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1319     picture_type = pic_param->picture_fields.bits.picture_type;
1320     intensity_compensation = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1321
1322     intel_update_vc1_frame_store_index(ctx,
1323                                        decode_state,
1324                                        pic_param,
1325                                        gen7_mfd_context->reference_surface);
1326
1327     /* Forward reference picture */
1328     obj_surface = decode_state->reference_objects[0];
1329     if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1330         obj_surface &&
1331         obj_surface->private_data) {
1332         if (picture_type == 1 && intensity_compensation) { /* P picture */
1333             struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1334
1335             gen7_vc1_surface->intensity_compensation = intensity_compensation;
1336             gen7_vc1_surface->luma_scale = pic_param->luma_scale;
1337             gen7_vc1_surface->luma_shift = pic_param->luma_shift;
1338         }
1339     }
1340
1341     /* Current decoded picture */
1342     obj_surface = decode_state->render_object;
1343     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1344     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1345
1346     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1347     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1348     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1349     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1350
1351     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1352     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1353     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1354     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1355
1356     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1357     bo = dri_bo_alloc(i965->intel.bufmgr,
1358                       "intra row store",
1359                       width_in_mbs * 64,
1360                       0x1000);
1361     assert(bo);
1362     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1363     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1364
1365     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1366     bo = dri_bo_alloc(i965->intel.bufmgr,
1367                       "deblocking filter row store",
1368                       width_in_mbs * 7 * 64,
1369                       0x1000);
1370     assert(bo);
1371     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1372     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1373
1374     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1375     bo = dri_bo_alloc(i965->intel.bufmgr,
1376                       "bsd mpc row store",
1377                       width_in_mbs * 96,
1378                       0x1000);
1379     assert(bo);
1380     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1381     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1382
1383     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1384
1385     if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1386         gen7_mfd_context->bitplane_read_buffer.valid = 1;
1387     else
1388         gen7_mfd_context->bitplane_read_buffer.valid = !!(pic_param->bitplane_present.value & 0x7f);
1389     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1390
1391     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1392         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1393         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1394         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1395         int src_w, src_h;
1396         uint8_t *src = NULL, *dst = NULL;
1397
1398         bo = dri_bo_alloc(i965->intel.bufmgr,
1399                           "VC-1 Bitplane",
1400                           bitplane_width * height_in_mbs,
1401                           0x1000);
1402         assert(bo);
1403         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1404
1405         dri_bo_map(bo, True);
1406         assert(bo->virtual);
1407         dst = bo->virtual;
1408
1409         if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1410             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1411                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1412                     int dst_index;
1413                     uint8_t src_value = 0x2;
1414
1415                     dst_index = src_w / 2;
1416                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1417                 }
1418
1419                 if (src_w & 1)
1420                     dst[src_w / 2] >>= 4;
1421
1422                 dst += bitplane_width;
1423             }
1424         } else {
1425             assert(decode_state->bit_plane->buffer);
1426             src = decode_state->bit_plane->buffer;
1427
1428             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1429                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1430                     int src_index, dst_index;
1431                     int src_shift;
1432                     uint8_t src_value;
1433
1434                     src_index = (src_h * width_in_mbs + src_w) / 2;
1435                     src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1436                     src_value = ((src[src_index] >> src_shift) & 0xf);
1437
1438                     dst_index = src_w / 2;
1439                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1440                 }
1441
1442                 if (src_w & 1)
1443                     dst[src_w / 2] >>= 4;
1444
1445                 dst += bitplane_width;
1446             }
1447         }
1448
1449         dri_bo_unmap(bo);
1450     } else
1451         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1452 }
1453
1454 static void
1455 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1456                        struct decode_state *decode_state,
1457                        struct gen7_mfd_context *gen7_mfd_context)
1458 {
1459     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1460     VAPictureParameterBufferVC1 *pic_param;
1461     struct object_surface *obj_surface;
1462     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1463     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1464     int unified_mv_mode;
1465     int ref_field_pic_polarity = 0;
1466     int scale_factor = 0;
1467     int trans_ac_y = 0;
1468     int dmv_surface_valid = 0;
1469     int brfd = 0;
1470     int fcm = 0;
1471     int picture_type;
1472     int ptype;
1473     int profile;
1474     int overlap = 0;
1475     int interpolation_mode = 0;
1476
1477     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1478     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1479
1480     picture_type = pic_param->picture_fields.bits.picture_type;
1481
1482     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1483     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1484     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1485     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1486     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1487     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1488     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1489     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1490
1491     if (dquant == 0) {
1492         alt_pquant_config = 0;
1493         alt_pquant_edge_mask = 0;
1494     } else if (dquant == 2) {
1495         alt_pquant_config = 1;
1496         alt_pquant_edge_mask = 0xf;
1497     } else {
1498         assert(dquant == 1);
1499         if (dquantfrm == 0) {
1500             alt_pquant_config = 0;
1501             alt_pquant_edge_mask = 0;
1502             alt_pq = 0;
1503         } else {
1504             assert(dquantfrm == 1);
1505             alt_pquant_config = 1;
1506
1507             switch (dqprofile) {
1508             case 3:
1509                 if (dqbilevel == 0) {
1510                     alt_pquant_config = 2;
1511                     alt_pquant_edge_mask = 0;
1512                 } else {
1513                     assert(dqbilevel == 1);
1514                     alt_pquant_config = 3;
1515                     alt_pquant_edge_mask = 0;
1516                 }
1517                 break;
1518
1519             case 0:
1520                 alt_pquant_edge_mask = 0xf;
1521                 break;
1522
1523             case 1:
1524                 if (dqdbedge == 3)
1525                     alt_pquant_edge_mask = 0x9;
1526                 else
1527                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1528
1529                 break;
1530
1531             case 2:
1532                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1533                 break;
1534
1535             default:
1536                 assert(0);
1537             }
1538         }
1539     }
1540
1541     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1542         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1543         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1544     } else {
1545         assert(pic_param->mv_fields.bits.mv_mode < 4);
1546         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1547     }
1548
1549     if (pic_param->sequence_fields.bits.interlace == 1 &&
1550         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1551         /* FIXME: calculate reference field picture polarity */
1552         assert(0);
1553         ref_field_pic_polarity = 0;
1554     }
1555
1556     if (pic_param->b_picture_fraction < 21)
1557         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1558
1559     if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1560         ptype = GEN7_VC1_P_PICTURE;
1561     else
1562         ptype = pic_param->picture_fields.bits.picture_type;
1563
1564     if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1565         picture_type == GEN7_VC1_I_PICTURE)
1566         picture_type = GEN7_VC1_BI_PICTURE;
1567
1568     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1569         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1570     else {
1571         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1572
1573         /*
1574          * 8.3.6.2.1 Transform Type Selection
1575          * If variable-sized transform coding is not enabled,
1576          * then the 8x8 transform shall be used for all blocks.
1577          * it is also MFX_VC1_PIC_STATE requirement.
1578          */
1579         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1580             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1581             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1582         }
1583     }
1584
1585     if (picture_type == GEN7_VC1_B_PICTURE) {
1586         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1587
1588         obj_surface = decode_state->reference_objects[1];
1589
1590         if (obj_surface)
1591             gen7_vc1_surface = obj_surface->private_data;
1592
1593         if (!gen7_vc1_surface ||
1594             (gen7_vc1_surface->picture_type == GEN7_VC1_I_PICTURE ||
1595              gen7_vc1_surface->picture_type == GEN7_VC1_BI_PICTURE))
1596             dmv_surface_valid = 0;
1597         else
1598             dmv_surface_valid = 1;
1599     }
1600
1601     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1602
1603     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1604         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1605     else {
1606         if (pic_param->picture_fields.bits.top_field_first)
1607             fcm = 2;
1608         else
1609             fcm = 3;
1610     }
1611
1612     if (picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1613         brfd = pic_param->reference_fields.bits.reference_distance;
1614         brfd = (scale_factor * brfd) >> 8;
1615         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1616
1617         if (brfd < 0)
1618             brfd = 0;
1619     }
1620
1621     if (pic_param->sequence_fields.bits.overlap) {
1622         if (profile != GEN7_VC1_ADVANCED_PROFILE) {
1623             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1624                 picture_type != GEN7_VC1_B_PICTURE) {
1625                 overlap = 1;
1626             }
1627         } else {
1628             if (picture_type == GEN7_VC1_P_PICTURE &&
1629                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1630                 overlap = 1;
1631             }
1632             if (picture_type == GEN7_VC1_I_PICTURE ||
1633                 picture_type == GEN7_VC1_BI_PICTURE) {
1634                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1635                     overlap = 1;
1636                 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1637                            va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1638                     overlap = 1;
1639                 }
1640             }
1641         }
1642     }
1643
1644     assert(pic_param->conditional_overlap_flag < 3);
1645     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1646
1647     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1648         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1649          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1650         interpolation_mode = 9; /* Half-pel bilinear */
1651     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1652              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1653               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1654         interpolation_mode = 1; /* Half-pel bicubic */
1655     else
1656         interpolation_mode = 0; /* Quarter-pel bicubic */
1657
1658     BEGIN_BCS_BATCH(batch, 6);
1659     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1660     OUT_BCS_BATCH(batch,
1661                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1662                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1663     OUT_BCS_BATCH(batch,
1664                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1665                   dmv_surface_valid << 15 |
1666                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1667                   pic_param->rounding_control << 13 |
1668                   pic_param->sequence_fields.bits.syncmarker << 12 |
1669                   interpolation_mode << 8 |
1670                   0 << 7 | /* FIXME: scale up or down ??? */
1671                   pic_param->range_reduction_frame << 6 |
1672                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1673                   overlap << 4 |
1674                   !pic_param->picture_fields.bits.is_first_field << 3 |
1675                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1676     OUT_BCS_BATCH(batch,
1677                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1678                   ptype << 26 |
1679                   fcm << 24 |
1680                   alt_pq << 16 |
1681                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1682                   scale_factor << 0);
1683     OUT_BCS_BATCH(batch,
1684                   unified_mv_mode << 28 |
1685                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1686                   pic_param->fast_uvmc_flag << 26 |
1687                   ref_field_pic_polarity << 25 |
1688                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1689                   pic_param->reference_fields.bits.reference_distance << 20 |
1690                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1691                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1692                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1693                   alt_pquant_edge_mask << 4 |
1694                   alt_pquant_config << 2 |
1695                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1696                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1697     OUT_BCS_BATCH(batch,
1698                   !!(pic_param->bitplane_present.value & 0x7f) << 31 |
1699                   pic_param->raw_coding.flags.forward_mb << 30 |
1700                   pic_param->raw_coding.flags.mv_type_mb << 29 |
1701                   pic_param->raw_coding.flags.skip_mb << 28 |
1702                   pic_param->raw_coding.flags.direct_mb << 27 |
1703                   pic_param->raw_coding.flags.overflags << 26 |
1704                   pic_param->raw_coding.flags.ac_pred << 25 |
1705                   pic_param->raw_coding.flags.field_tx << 24 |
1706                   pic_param->mv_fields.bits.mv_table << 20 |
1707                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1708                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1709                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1710                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1711                   pic_param->mb_mode_table << 8 |
1712                   trans_ac_y << 6 |
1713                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1714                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1715                   pic_param->cbp_table << 0);
1716     ADVANCE_BCS_BATCH(batch);
1717 }
1718
1719 static void
1720 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1721                              struct decode_state *decode_state,
1722                              struct gen7_mfd_context *gen7_mfd_context)
1723 {
1724     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1725     VAPictureParameterBufferVC1 *pic_param;
1726     int picture_type;
1727     int intensitycomp_single_fwd = 0;
1728     int luma_scale1 = 0;
1729     int luma_shift1 = 0;
1730
1731     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1732     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1733     picture_type = pic_param->picture_fields.bits.picture_type;
1734
1735     if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID) {
1736         if (picture_type == 1 || picture_type == 2) { /* P/B picture */
1737             struct gen7_vc1_surface *gen7_vc1_surface = gen7_mfd_context->reference_surface[0].obj_surface->private_data;
1738             if (gen7_vc1_surface) {
1739                 intensitycomp_single_fwd = gen7_vc1_surface->intensity_compensation;
1740                 luma_scale1 = gen7_vc1_surface->luma_scale;
1741                 luma_shift1 = gen7_vc1_surface->luma_shift;
1742             }
1743         }
1744     }
1745
1746     BEGIN_BCS_BATCH(batch, 6);
1747     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1748     OUT_BCS_BATCH(batch,
1749                   0 << 14 | /* FIXME: double ??? */
1750                   0 << 12 |
1751                   intensitycomp_single_fwd << 10 |
1752                   0 << 8 |
1753                   0 << 4 | /* FIXME: interlace mode */
1754                   0);
1755     OUT_BCS_BATCH(batch,
1756                   luma_shift1 << 16 |
1757                   luma_scale1 << 0);
1758     OUT_BCS_BATCH(batch, 0);
1759     OUT_BCS_BATCH(batch, 0);
1760     OUT_BCS_BATCH(batch, 0);
1761     ADVANCE_BCS_BATCH(batch);
1762 }
1763
1764 static void
1765 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1766                               struct decode_state *decode_state,
1767                               struct gen7_mfd_context *gen7_mfd_context)
1768 {
1769     struct i965_driver_data *i965 = i965_driver_data(ctx);
1770     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1771     struct object_surface *obj_surface;
1772     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1773
1774     obj_surface = decode_state->render_object;
1775
1776     if (obj_surface && obj_surface->private_data) {
1777         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1778     }
1779
1780     obj_surface = decode_state->reference_objects[1];
1781
1782     if (obj_surface && obj_surface->private_data) {
1783         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1784     }
1785
1786     BEGIN_BCS_BATCH(batch, 7);
1787     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1788
1789     if (dmv_write_buffer)
1790         OUT_BCS_RELOC64(batch, dmv_write_buffer,
1791                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1792                         0);
1793     else {
1794         OUT_BCS_BATCH(batch, 0);
1795         OUT_BCS_BATCH(batch, 0);
1796     }
1797
1798     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1799
1800     if (dmv_read_buffer)
1801         OUT_BCS_RELOC64(batch, dmv_read_buffer,
1802                         I915_GEM_DOMAIN_INSTRUCTION, 0,
1803                         0);
1804     else {
1805         OUT_BCS_BATCH(batch, 0);
1806         OUT_BCS_BATCH(batch, 0);
1807     }
1808
1809     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1810
1811     ADVANCE_BCS_BATCH(batch);
1812 }
1813
1814 static int
1815 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1816 {
1817     int out_slice_data_bit_offset;
1818     int slice_header_size = in_slice_data_bit_offset / 8;
1819     int i, j;
1820
1821     if (profile != 3)
1822         out_slice_data_bit_offset = in_slice_data_bit_offset;
1823     else {
1824         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1825             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1826                 if (i < slice_header_size - 1)
1827                     i++, j += 2;
1828                 else {
1829                     buf[j + 2] = buf[j + 1];
1830                     j++;
1831                 }
1832             }
1833         }
1834
1835         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1836     }
1837
1838     return out_slice_data_bit_offset;
1839 }
1840
1841 static void
1842 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1843                         VAPictureParameterBufferVC1 *pic_param,
1844                         VASliceParameterBufferVC1 *slice_param,
1845                         VASliceParameterBufferVC1 *next_slice_param,
1846                         dri_bo *slice_data_bo,
1847                         struct gen7_mfd_context *gen7_mfd_context)
1848 {
1849     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1850     int next_slice_start_vert_pos;
1851     int macroblock_offset;
1852     uint8_t *slice_data = NULL;
1853
1854     dri_bo_map(slice_data_bo, True);
1855     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1856     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1857                                                                slice_param->macroblock_offset,
1858                                                                pic_param->sequence_fields.bits.profile);
1859     dri_bo_unmap(slice_data_bo);
1860
1861     if (next_slice_param)
1862         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1863     else
1864         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1865
1866     BEGIN_BCS_BATCH(batch, 5);
1867     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1868     OUT_BCS_BATCH(batch,
1869                   slice_param->slice_data_size - (macroblock_offset >> 3));
1870     OUT_BCS_BATCH(batch,
1871                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1872     OUT_BCS_BATCH(batch,
1873                   slice_param->slice_vertical_position << 16 |
1874                   next_slice_start_vert_pos << 0);
1875     OUT_BCS_BATCH(batch,
1876                   (macroblock_offset & 0x7));
1877     ADVANCE_BCS_BATCH(batch);
1878 }
1879
1880 static void
1881 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1882                             struct decode_state *decode_state,
1883                             struct gen7_mfd_context *gen7_mfd_context)
1884 {
1885     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1886     VAPictureParameterBufferVC1 *pic_param;
1887     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1888     dri_bo *slice_data_bo;
1889     int i, j;
1890
1891     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1892     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1893
1894     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1895     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1896     intel_batchbuffer_emit_mi_flush(batch);
1897     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1898     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1899     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1900     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1901     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1902     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1903     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1904
1905     for (j = 0; j < decode_state->num_slice_params; j++) {
1906         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1907         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1908         slice_data_bo = decode_state->slice_datas[j]->bo;
1909         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1910
1911         if (j == decode_state->num_slice_params - 1)
1912             next_slice_group_param = NULL;
1913         else
1914             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1915
1916         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1917             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1918
1919             if (i < decode_state->slice_params[j]->num_elements - 1)
1920                 next_slice_param = slice_param + 1;
1921             else
1922                 next_slice_param = next_slice_group_param;
1923
1924             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1925             slice_param++;
1926         }
1927     }
1928
1929     intel_batchbuffer_end_atomic(batch);
1930     intel_batchbuffer_flush(batch);
1931 }
1932
1933 static void
1934 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1935                           struct decode_state *decode_state,
1936                           struct gen7_mfd_context *gen7_mfd_context)
1937 {
1938     struct object_surface *obj_surface;
1939     VAPictureParameterBufferJPEGBaseline *pic_param;
1940     int subsampling = SUBSAMPLE_YUV420;
1941     int fourcc = VA_FOURCC_IMC3;
1942
1943     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1944
1945     if (pic_param->num_components == 1) {
1946         subsampling = SUBSAMPLE_YUV400;
1947         fourcc = VA_FOURCC_Y800;
1948     } else if (pic_param->num_components == 3) {
1949         int h1 = pic_param->components[0].h_sampling_factor;
1950         int h2 = pic_param->components[1].h_sampling_factor;
1951         int h3 = pic_param->components[2].h_sampling_factor;
1952         int v1 = pic_param->components[0].v_sampling_factor;
1953         int v2 = pic_param->components[1].v_sampling_factor;
1954         int v3 = pic_param->components[2].v_sampling_factor;
1955
1956         if (h1 == 2 * h2 && h2 == h3 &&
1957             v1 == 2 * v2 && v2 == v3) {
1958             subsampling = SUBSAMPLE_YUV420;
1959             fourcc = VA_FOURCC_IMC3;
1960         } else if (h1 == 2 * h2  && h2 == h3 &&
1961                    v1 == v2 && v2 == v3) {
1962             subsampling = SUBSAMPLE_YUV422H;
1963             fourcc = VA_FOURCC_422H;
1964         } else if (h1 == h2 && h2 == h3 &&
1965                    v1 == v2  && v2 == v3) {
1966             subsampling = SUBSAMPLE_YUV444;
1967             fourcc = VA_FOURCC_444P;
1968         } else if (h1 == 4 * h2 && h2 ==  h3 &&
1969                    v1 == v2 && v2 == v3) {
1970             subsampling = SUBSAMPLE_YUV411;
1971             fourcc = VA_FOURCC_411P;
1972         } else if (h1 == h2 && h2 == h3 &&
1973                    v1 == 2 * v2 && v2 == v3) {
1974             subsampling = SUBSAMPLE_YUV422V;
1975             fourcc = VA_FOURCC_422V;
1976         } else
1977             assert(0);
1978     } else {
1979         assert(0);
1980     }
1981
1982     /* Current decoded picture */
1983     obj_surface = decode_state->render_object;
1984     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1985
1986     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1987     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1988     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1989     gen7_mfd_context->pre_deblocking_output.valid = 1;
1990
1991     gen7_mfd_context->post_deblocking_output.bo = NULL;
1992     gen7_mfd_context->post_deblocking_output.valid = 0;
1993
1994     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1995     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1996
1997     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1998     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1999
2000     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2001     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2002
2003     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2004     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2005
2006     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2007     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2008 }
2009
2010 static const int va_to_gen7_jpeg_rotation[4] = {
2011     GEN7_JPEG_ROTATION_0,
2012     GEN7_JPEG_ROTATION_90,
2013     GEN7_JPEG_ROTATION_180,
2014     GEN7_JPEG_ROTATION_270
2015 };
2016
2017 static void
2018 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
2019                         struct decode_state *decode_state,
2020                         struct gen7_mfd_context *gen7_mfd_context)
2021 {
2022     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2023     VAPictureParameterBufferJPEGBaseline *pic_param;
2024     int chroma_type = GEN7_YUV420;
2025     int frame_width_in_blks;
2026     int frame_height_in_blks;
2027
2028     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2029     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2030
2031     if (pic_param->num_components == 1)
2032         chroma_type = GEN7_YUV400;
2033     else if (pic_param->num_components == 3) {
2034         int h1 = pic_param->components[0].h_sampling_factor;
2035         int h2 = pic_param->components[1].h_sampling_factor;
2036         int h3 = pic_param->components[2].h_sampling_factor;
2037         int v1 = pic_param->components[0].v_sampling_factor;
2038         int v2 = pic_param->components[1].v_sampling_factor;
2039         int v3 = pic_param->components[2].v_sampling_factor;
2040
2041         if (h1 == 2 * h2 && h2 == h3 &&
2042             v1 == 2 * v2 && v2 == v3)
2043             chroma_type = GEN7_YUV420;
2044         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2045                  v1 == 1 && v2 == 1 && v3 == 1)
2046             chroma_type = GEN7_YUV422H_2Y;
2047         else if (h1 == h2 && h2 == h3 &&
2048                  v1 == v2 && v2 == v3)
2049             chroma_type = GEN7_YUV444;
2050         else if (h1 == 4 * h2 && h2 == h3 &&
2051                  v1 == v2 && v2 == v3)
2052             chroma_type = GEN7_YUV411;
2053         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2054                  v1 == 2 && v2 == 1 && v3 == 1)
2055             chroma_type = GEN7_YUV422V_2Y;
2056         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2057                  v1 == 2 && v2 == 2 && v3 == 2)
2058             chroma_type = GEN7_YUV422H_4Y;
2059         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2060                  v1 == 2 && v2 == 1 && v3 == 1)
2061             chroma_type = GEN7_YUV422V_4Y;
2062         else
2063             assert(0);
2064     }
2065
2066     if (chroma_type == GEN7_YUV400 ||
2067         chroma_type == GEN7_YUV444 ||
2068         chroma_type == GEN7_YUV422V_2Y) {
2069         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2070         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2071     } else if (chroma_type == GEN7_YUV411) {
2072         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2073         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2074     } else {
2075         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2076         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2077     }
2078
2079     BEGIN_BCS_BATCH(batch, 3);
2080     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2081     OUT_BCS_BATCH(batch,
2082                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2083                   (chroma_type << 0));
2084     OUT_BCS_BATCH(batch,
2085                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2086                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2087     ADVANCE_BCS_BATCH(batch);
2088 }
2089
2090 static const int va_to_gen7_jpeg_hufftable[2] = {
2091     MFX_HUFFTABLE_ID_Y,
2092     MFX_HUFFTABLE_ID_UV
2093 };
2094
2095 static void
2096 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2097                                struct decode_state *decode_state,
2098                                struct gen7_mfd_context *gen7_mfd_context,
2099                                int num_tables)
2100 {
2101     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2102     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2103     int index;
2104
2105     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2106         return;
2107
2108     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2109
2110     for (index = 0; index < num_tables; index++) {
2111         int id = va_to_gen7_jpeg_hufftable[index];
2112         if (!huffman_table->load_huffman_table[index])
2113             continue;
2114         BEGIN_BCS_BATCH(batch, 53);
2115         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2116         OUT_BCS_BATCH(batch, id);
2117         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2118         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2119         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2120         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2121         ADVANCE_BCS_BATCH(batch);
2122     }
2123 }
2124
2125 static const int va_to_gen7_jpeg_qm[5] = {
2126     -1,
2127     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2128     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2129     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2130     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2131 };
2132
2133 static void
2134 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2135                        struct decode_state *decode_state,
2136                        struct gen7_mfd_context *gen7_mfd_context)
2137 {
2138     VAPictureParameterBufferJPEGBaseline *pic_param;
2139     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2140     int index;
2141
2142     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2143         return;
2144
2145     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2146     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2147
2148     assert(pic_param->num_components <= 3);
2149
2150     for (index = 0; index < pic_param->num_components; index++) {
2151         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2152         int qm_type;
2153         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2154         unsigned char raster_qm[64];
2155         int j;
2156
2157         if (id > 4 || id < 1)
2158             continue;
2159
2160         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2161             continue;
2162
2163         qm_type = va_to_gen7_jpeg_qm[id];
2164
2165         for (j = 0; j < 64; j++)
2166             raster_qm[zigzag_direct[j]] = qm[j];
2167
2168         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2169     }
2170 }
2171
2172 static void
2173 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2174                          VAPictureParameterBufferJPEGBaseline *pic_param,
2175                          VASliceParameterBufferJPEGBaseline *slice_param,
2176                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2177                          dri_bo *slice_data_bo,
2178                          struct gen7_mfd_context *gen7_mfd_context)
2179 {
2180     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2181     int scan_component_mask = 0;
2182     int i;
2183
2184     assert(slice_param->num_components > 0);
2185     assert(slice_param->num_components < 4);
2186     assert(slice_param->num_components <= pic_param->num_components);
2187
2188     for (i = 0; i < slice_param->num_components; i++) {
2189         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2190         case 1:
2191             scan_component_mask |= (1 << 0);
2192             break;
2193         case 2:
2194             scan_component_mask |= (1 << 1);
2195             break;
2196         case 3:
2197             scan_component_mask |= (1 << 2);
2198             break;
2199         default:
2200             assert(0);
2201             break;
2202         }
2203     }
2204
2205     BEGIN_BCS_BATCH(batch, 6);
2206     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2207     OUT_BCS_BATCH(batch,
2208                   slice_param->slice_data_size);
2209     OUT_BCS_BATCH(batch,
2210                   slice_param->slice_data_offset);
2211     OUT_BCS_BATCH(batch,
2212                   slice_param->slice_horizontal_position << 16 |
2213                   slice_param->slice_vertical_position << 0);
2214     OUT_BCS_BATCH(batch,
2215                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2216                   (scan_component_mask << 27) |                 /* scan components */
2217                   (0 << 26) |   /* disable interrupt allowed */
2218                   (slice_param->num_mcus << 0));                /* MCU count */
2219     OUT_BCS_BATCH(batch,
2220                   (slice_param->restart_interval << 0));    /* RestartInterval */
2221     ADVANCE_BCS_BATCH(batch);
2222 }
2223
2224 /* Workaround for JPEG decoding on Ivybridge */
2225 #ifdef JPEG_WA
2226
2227 static struct {
2228     int width;
2229     int height;
2230     unsigned char data[32];
2231     int data_size;
2232     int data_bit_offset;
2233     int qp;
2234 } gen7_jpeg_wa_clip = {
2235     16,
2236     16,
2237     {
2238         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2239         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2240     },
2241     14,
2242     40,
2243     28,
2244 };
2245
2246 static void
2247 gen8_jpeg_wa_init(VADriverContextP ctx,
2248                   struct gen7_mfd_context *gen7_mfd_context)
2249 {
2250     struct i965_driver_data *i965 = i965_driver_data(ctx);
2251     VAStatus status;
2252     struct object_surface *obj_surface;
2253
2254     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2255         i965_DestroySurfaces(ctx,
2256                              &gen7_mfd_context->jpeg_wa_surface_id,
2257                              1);
2258
2259     status = i965_CreateSurfaces(ctx,
2260                                  gen7_jpeg_wa_clip.width,
2261                                  gen7_jpeg_wa_clip.height,
2262                                  VA_RT_FORMAT_YUV420,
2263                                  1,
2264                                  &gen7_mfd_context->jpeg_wa_surface_id);
2265     assert(status == VA_STATUS_SUCCESS);
2266
2267     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2268     assert(obj_surface);
2269     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2270     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2271
2272     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2273         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2274                                                                "JPEG WA data",
2275                                                                0x1000,
2276                                                                0x1000);
2277         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2278                        0,
2279                        gen7_jpeg_wa_clip.data_size,
2280                        gen7_jpeg_wa_clip.data);
2281     }
2282 }
2283
2284 static void
2285 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2286                               struct gen7_mfd_context *gen7_mfd_context)
2287 {
2288     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2289
2290     BEGIN_BCS_BATCH(batch, 5);
2291     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2292     OUT_BCS_BATCH(batch,
2293                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2294                   (MFD_MODE_VLD << 15) | /* VLD mode */
2295                   (0 << 10) | /* disable Stream-Out */
2296                   (0 << 9)  | /* Post Deblocking Output */
2297                   (1 << 8)  | /* Pre Deblocking Output */
2298                   (0 << 5)  | /* not in stitch mode */
2299                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2300                   (MFX_FORMAT_AVC << 0));
2301     OUT_BCS_BATCH(batch,
2302                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2303                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2304                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2305                   (0 << 1)  |
2306                   (0 << 0));
2307     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2308     OUT_BCS_BATCH(batch, 0); /* reserved */
2309     ADVANCE_BCS_BATCH(batch);
2310 }
2311
2312 static void
2313 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2314                            struct gen7_mfd_context *gen7_mfd_context)
2315 {
2316     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2317     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2318
2319     BEGIN_BCS_BATCH(batch, 6);
2320     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2321     OUT_BCS_BATCH(batch, 0);
2322     OUT_BCS_BATCH(batch,
2323                   ((obj_surface->orig_width - 1) << 18) |
2324                   ((obj_surface->orig_height - 1) << 4));
2325     OUT_BCS_BATCH(batch,
2326                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2327                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2328                   (0 << 22) | /* surface object control state, ignored */
2329                   ((obj_surface->width - 1) << 3) | /* pitch */
2330                   (0 << 2)  | /* must be 0 */
2331                   (1 << 1)  | /* must be tiled */
2332                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2333     OUT_BCS_BATCH(batch,
2334                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2335                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2336     OUT_BCS_BATCH(batch,
2337                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2338                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2339     ADVANCE_BCS_BATCH(batch);
2340 }
2341
2342 static void
2343 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2344                                  struct gen7_mfd_context *gen7_mfd_context)
2345 {
2346     struct i965_driver_data *i965 = i965_driver_data(ctx);
2347     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2348     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2349     dri_bo *intra_bo;
2350     int i;
2351
2352     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2353                             "intra row store",
2354                             128 * 64,
2355                             0x1000);
2356
2357     BEGIN_BCS_BATCH(batch, 61);
2358     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2359     OUT_BCS_RELOC64(batch,
2360                     obj_surface->bo,
2361                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2362                     0);
2363     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2364
2365
2366     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2367     OUT_BCS_BATCH(batch, 0);
2368     OUT_BCS_BATCH(batch, 0);
2369
2370     /* uncompressed-video & stream out 7-12 */
2371     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2372     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2373     OUT_BCS_BATCH(batch, 0);
2374     OUT_BCS_BATCH(batch, 0);
2375     OUT_BCS_BATCH(batch, 0);
2376     OUT_BCS_BATCH(batch, 0);
2377
2378     /* the DW 13-15 is for intra row store scratch */
2379     OUT_BCS_RELOC64(batch,
2380                     intra_bo,
2381                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2382                     0);
2383
2384     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2385
2386     /* the DW 16-18 is for deblocking filter */
2387     OUT_BCS_BATCH(batch, 0);
2388     OUT_BCS_BATCH(batch, 0);
2389     OUT_BCS_BATCH(batch, 0);
2390
2391     /* DW 19..50 */
2392     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2393         OUT_BCS_BATCH(batch, 0);
2394         OUT_BCS_BATCH(batch, 0);
2395     }
2396     OUT_BCS_BATCH(batch, 0);
2397
2398     /* the DW52-54 is for mb status address */
2399     OUT_BCS_BATCH(batch, 0);
2400     OUT_BCS_BATCH(batch, 0);
2401     OUT_BCS_BATCH(batch, 0);
2402     /* the DW56-60 is for ILDB & second ILDB address */
2403     OUT_BCS_BATCH(batch, 0);
2404     OUT_BCS_BATCH(batch, 0);
2405     OUT_BCS_BATCH(batch, 0);
2406     OUT_BCS_BATCH(batch, 0);
2407     OUT_BCS_BATCH(batch, 0);
2408     OUT_BCS_BATCH(batch, 0);
2409
2410     ADVANCE_BCS_BATCH(batch);
2411
2412     dri_bo_unreference(intra_bo);
2413 }
2414
2415 static void
2416 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2417                                      struct gen7_mfd_context *gen7_mfd_context)
2418 {
2419     struct i965_driver_data *i965 = i965_driver_data(ctx);
2420     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2421     dri_bo *bsd_mpc_bo, *mpr_bo;
2422
2423     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2424                               "bsd mpc row store",
2425                               11520, /* 1.5 * 120 * 64 */
2426                               0x1000);
2427
2428     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2429                           "mpr row store",
2430                           7680, /* 1. 0 * 120 * 64 */
2431                           0x1000);
2432
2433     BEGIN_BCS_BATCH(batch, 10);
2434     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2435
2436     OUT_BCS_RELOC64(batch,
2437                     bsd_mpc_bo,
2438                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2439                     0);
2440
2441     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2442
2443     OUT_BCS_RELOC64(batch,
2444                     mpr_bo,
2445                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2446                     0);
2447     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2448
2449     OUT_BCS_BATCH(batch, 0);
2450     OUT_BCS_BATCH(batch, 0);
2451     OUT_BCS_BATCH(batch, 0);
2452
2453     ADVANCE_BCS_BATCH(batch);
2454
2455     dri_bo_unreference(bsd_mpc_bo);
2456     dri_bo_unreference(mpr_bo);
2457 }
2458
2459 static void
2460 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2461                           struct gen7_mfd_context *gen7_mfd_context)
2462 {
2463
2464 }
2465
2466 static void
2467 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2468                            struct gen7_mfd_context *gen7_mfd_context)
2469 {
2470     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2471     int img_struct = 0;
2472     int mbaff_frame_flag = 0;
2473     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2474
2475     BEGIN_BCS_BATCH(batch, 16);
2476     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2477     OUT_BCS_BATCH(batch,
2478                   width_in_mbs * height_in_mbs);
2479     OUT_BCS_BATCH(batch,
2480                   ((height_in_mbs - 1) << 16) |
2481                   ((width_in_mbs - 1) << 0));
2482     OUT_BCS_BATCH(batch,
2483                   (0 << 24) |
2484                   (0 << 16) |
2485                   (0 << 14) |
2486                   (0 << 13) |
2487                   (0 << 12) | /* differ from GEN6 */
2488                   (0 << 10) |
2489                   (img_struct << 8));
2490     OUT_BCS_BATCH(batch,
2491                   (1 << 10) | /* 4:2:0 */
2492                   (1 << 7) |  /* CABAC */
2493                   (0 << 6) |
2494                   (0 << 5) |
2495                   (0 << 4) |
2496                   (0 << 3) |
2497                   (1 << 2) |
2498                   (mbaff_frame_flag << 1) |
2499                   (0 << 0));
2500     OUT_BCS_BATCH(batch, 0);
2501     OUT_BCS_BATCH(batch, 0);
2502     OUT_BCS_BATCH(batch, 0);
2503     OUT_BCS_BATCH(batch, 0);
2504     OUT_BCS_BATCH(batch, 0);
2505     OUT_BCS_BATCH(batch, 0);
2506     OUT_BCS_BATCH(batch, 0);
2507     OUT_BCS_BATCH(batch, 0);
2508     OUT_BCS_BATCH(batch, 0);
2509     OUT_BCS_BATCH(batch, 0);
2510     OUT_BCS_BATCH(batch, 0);
2511     ADVANCE_BCS_BATCH(batch);
2512 }
2513
2514 static void
2515 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2516                                   struct gen7_mfd_context *gen7_mfd_context)
2517 {
2518     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2519     int i;
2520
2521     BEGIN_BCS_BATCH(batch, 71);
2522     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2523
2524     /* reference surfaces 0..15 */
2525     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2526         OUT_BCS_BATCH(batch, 0); /* top */
2527         OUT_BCS_BATCH(batch, 0); /* bottom */
2528     }
2529
2530     OUT_BCS_BATCH(batch, 0);
2531
2532     /* the current decoding frame/field */
2533     OUT_BCS_BATCH(batch, 0); /* top */
2534     OUT_BCS_BATCH(batch, 0);
2535     OUT_BCS_BATCH(batch, 0);
2536
2537     /* POC List */
2538     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2539         OUT_BCS_BATCH(batch, 0);
2540         OUT_BCS_BATCH(batch, 0);
2541     }
2542
2543     OUT_BCS_BATCH(batch, 0);
2544     OUT_BCS_BATCH(batch, 0);
2545
2546     ADVANCE_BCS_BATCH(batch);
2547 }
2548
2549 static void
2550 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2551                                      struct gen7_mfd_context *gen7_mfd_context)
2552 {
2553     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2554
2555     BEGIN_BCS_BATCH(batch, 11);
2556     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2557     OUT_BCS_RELOC64(batch,
2558                     gen7_mfd_context->jpeg_wa_slice_data_bo,
2559                     I915_GEM_DOMAIN_INSTRUCTION, 0,
2560                     0);
2561     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2562     OUT_BCS_BATCH(batch, 0);
2563     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2564     OUT_BCS_BATCH(batch, 0);
2565     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2566     OUT_BCS_BATCH(batch, 0);
2567     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2568     OUT_BCS_BATCH(batch, 0);
2569     ADVANCE_BCS_BATCH(batch);
2570 }
2571
2572 static void
2573 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2574                             struct gen7_mfd_context *gen7_mfd_context)
2575 {
2576     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2577
2578     /* the input bitsteam format on GEN7 differs from GEN6 */
2579     BEGIN_BCS_BATCH(batch, 6);
2580     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2581     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2582     OUT_BCS_BATCH(batch, 0);
2583     OUT_BCS_BATCH(batch,
2584                   (0 << 31) |
2585                   (0 << 14) |
2586                   (0 << 12) |
2587                   (0 << 10) |
2588                   (0 << 8));
2589     OUT_BCS_BATCH(batch,
2590                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2591                   (0 << 5)  |
2592                   (0 << 4)  |
2593                   (1 << 3) | /* LastSlice Flag */
2594                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2595     OUT_BCS_BATCH(batch, 0);
2596     ADVANCE_BCS_BATCH(batch);
2597 }
2598
2599 static void
2600 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2601                              struct gen7_mfd_context *gen7_mfd_context)
2602 {
2603     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2604     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2605     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2606     int first_mb_in_slice = 0;
2607     int slice_type = SLICE_TYPE_I;
2608
2609     BEGIN_BCS_BATCH(batch, 11);
2610     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2611     OUT_BCS_BATCH(batch, slice_type);
2612     OUT_BCS_BATCH(batch,
2613                   (num_ref_idx_l1 << 24) |
2614                   (num_ref_idx_l0 << 16) |
2615                   (0 << 8) |
2616                   (0 << 0));
2617     OUT_BCS_BATCH(batch,
2618                   (0 << 29) |
2619                   (1 << 27) |   /* disable Deblocking */
2620                   (0 << 24) |
2621                   (gen7_jpeg_wa_clip.qp << 16) |
2622                   (0 << 8) |
2623                   (0 << 0));
2624     OUT_BCS_BATCH(batch,
2625                   (slice_ver_pos << 24) |
2626                   (slice_hor_pos << 16) |
2627                   (first_mb_in_slice << 0));
2628     OUT_BCS_BATCH(batch,
2629                   (next_slice_ver_pos << 16) |
2630                   (next_slice_hor_pos << 0));
2631     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2632     OUT_BCS_BATCH(batch, 0);
2633     OUT_BCS_BATCH(batch, 0);
2634     OUT_BCS_BATCH(batch, 0);
2635     OUT_BCS_BATCH(batch, 0);
2636     ADVANCE_BCS_BATCH(batch);
2637 }
2638
2639 static void
2640 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2641                  struct gen7_mfd_context *gen7_mfd_context)
2642 {
2643     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2644     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2645     intel_batchbuffer_emit_mi_flush(batch);
2646     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2647     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2648     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2649     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2650     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2651     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2652     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2653
2654     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2655     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2656     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2657 }
2658
2659 #endif
2660
2661 void
2662 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2663                              struct decode_state *decode_state,
2664                              struct gen7_mfd_context *gen7_mfd_context)
2665 {
2666     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2667     VAPictureParameterBufferJPEGBaseline *pic_param;
2668     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2669     dri_bo *slice_data_bo;
2670     int i, j, max_selector = 0;
2671
2672     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2673     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2674
2675     /* Currently only support Baseline DCT */
2676     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2677     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2678 #ifdef JPEG_WA
2679     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2680 #endif
2681     intel_batchbuffer_emit_mi_flush(batch);
2682     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2683     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2684     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2685     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2686     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2687
2688     for (j = 0; j < decode_state->num_slice_params; j++) {
2689         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2690         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2691         slice_data_bo = decode_state->slice_datas[j]->bo;
2692         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2693
2694         if (j == decode_state->num_slice_params - 1)
2695             next_slice_group_param = NULL;
2696         else
2697             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2698
2699         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2700             int component;
2701
2702             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2703
2704             if (i < decode_state->slice_params[j]->num_elements - 1)
2705                 next_slice_param = slice_param + 1;
2706             else
2707                 next_slice_param = next_slice_group_param;
2708
2709             for (component = 0; component < slice_param->num_components; component++) {
2710                 if (max_selector < slice_param->components[component].dc_table_selector)
2711                     max_selector = slice_param->components[component].dc_table_selector;
2712
2713                 if (max_selector < slice_param->components[component].ac_table_selector)
2714                     max_selector = slice_param->components[component].ac_table_selector;
2715             }
2716
2717             slice_param++;
2718         }
2719     }
2720
2721     assert(max_selector < 2);
2722     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2723
2724     for (j = 0; j < decode_state->num_slice_params; j++) {
2725         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2726         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2727         slice_data_bo = decode_state->slice_datas[j]->bo;
2728         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2729
2730         if (j == decode_state->num_slice_params - 1)
2731             next_slice_group_param = NULL;
2732         else
2733             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2734
2735         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2736             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2737
2738             if (i < decode_state->slice_params[j]->num_elements - 1)
2739                 next_slice_param = slice_param + 1;
2740             else
2741                 next_slice_param = next_slice_group_param;
2742
2743             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2744             slice_param++;
2745         }
2746     }
2747
2748     intel_batchbuffer_end_atomic(batch);
2749     intel_batchbuffer_flush(batch);
2750 }
2751
2752 static const int vp8_dc_qlookup[128] = {
2753     4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2754     18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2755     29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2756     44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2757     59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2758     75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2759     91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2760     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2761 };
2762
2763 static const int vp8_ac_qlookup[128] = {
2764     4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2765     20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2766     36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2767     52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2768     78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2769     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2770     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2771     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2772 };
2773
2774 static inline unsigned int vp8_clip_quantization_index(int index)
2775 {
2776     if (index > 127)
2777         return 127;
2778     else if (index < 0)
2779         return 0;
2780
2781     return index;
2782 }
2783
2784 static void
2785 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2786                          struct decode_state *decode_state,
2787                          struct gen7_mfd_context *gen7_mfd_context)
2788 {
2789     struct object_surface *obj_surface;
2790     struct i965_driver_data *i965 = i965_driver_data(ctx);
2791     dri_bo *bo;
2792     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2793     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2794     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2795
2796     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2797     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2798
2799     intel_update_vp8_frame_store_index(ctx,
2800                                        decode_state,
2801                                        pic_param,
2802                                        gen7_mfd_context->reference_surface);
2803
2804     /* Current decoded picture */
2805     obj_surface = decode_state->render_object;
2806     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2807
2808     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2809     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2810     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2811     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2812
2813     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2814     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2815     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2816     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2817
2818     intel_ensure_vp8_segmentation_buffer(ctx,
2819                                          &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2820
2821     /* The same as AVC */
2822     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2823     bo = dri_bo_alloc(i965->intel.bufmgr,
2824                       "intra row store",
2825                       width_in_mbs * 64,
2826                       0x1000);
2827     assert(bo);
2828     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2829     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2830
2831     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2832     bo = dri_bo_alloc(i965->intel.bufmgr,
2833                       "deblocking filter row store",
2834                       width_in_mbs * 64 * 4,
2835                       0x1000);
2836     assert(bo);
2837     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2838     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2839
2840     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2841     bo = dri_bo_alloc(i965->intel.bufmgr,
2842                       "bsd mpc row store",
2843                       width_in_mbs * 64 * 2,
2844                       0x1000);
2845     assert(bo);
2846     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2847     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2848
2849     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2850     bo = dri_bo_alloc(i965->intel.bufmgr,
2851                       "mpr row store",
2852                       width_in_mbs * 64 * 2,
2853                       0x1000);
2854     assert(bo);
2855     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2856     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2857
2858     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2859 }
2860
2861 static void
2862 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2863                        struct decode_state *decode_state,
2864                        struct gen7_mfd_context *gen7_mfd_context)
2865 {
2866     struct i965_driver_data *i965 = i965_driver_data(ctx);
2867     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2868     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2869     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2870     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2871     dri_bo *probs_bo = decode_state->probability_data->bo;
2872     int i, j, log2num;
2873     unsigned int quantization_value[4][6];
2874
2875     /* There is no safe way to error out if the segmentation buffer
2876        could not be allocated. So, instead of aborting, simply decode
2877        something even if the result may look totally inacurate */
2878     const unsigned int enable_segmentation =
2879         pic_param->pic_fields.bits.segmentation_enabled &&
2880         gen7_mfd_context->segmentation_buffer.valid;
2881
2882     log2num = (int)log2(slice_param->num_of_partitions - 1);
2883
2884     BEGIN_BCS_BATCH(batch, 38);
2885     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2886     OUT_BCS_BATCH(batch,
2887                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2888                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2889     OUT_BCS_BATCH(batch,
2890                   log2num << 24 |
2891                   pic_param->pic_fields.bits.sharpness_level << 16 |
2892                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2893                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2894                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2895                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2896                   (enable_segmentation &&
2897                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2898                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2899                   (enable_segmentation &&
2900                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2901                   (enable_segmentation &&
2902                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2903                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2904                   pic_param->pic_fields.bits.filter_type << 4 |
2905                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2906                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2907
2908     OUT_BCS_BATCH(batch,
2909                   pic_param->loop_filter_level[3] << 24 |
2910                   pic_param->loop_filter_level[2] << 16 |
2911                   pic_param->loop_filter_level[1] <<  8 |
2912                   pic_param->loop_filter_level[0] <<  0);
2913
2914     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2915     for (i = 0; i < 4; i++) {
2916         quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2917         quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2918         quantization_value[i][2] = 2 * vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])]; /*y2dc*/
2919         /* 101581>>16 is equivalent to 155/100 */
2920         quantization_value[i][3] = (101581 * vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16; /*y2ac*/
2921         quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2922         quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2923
2924         quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2925         quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2926
2927         OUT_BCS_BATCH(batch,
2928                       quantization_value[i][0] << 16 | /* Y1AC */
2929                       quantization_value[i][1] <<  0); /* Y1DC */
2930         OUT_BCS_BATCH(batch,
2931                       quantization_value[i][5] << 16 | /* UVAC */
2932                       quantization_value[i][4] <<  0); /* UVDC */
2933         OUT_BCS_BATCH(batch,
2934                       quantization_value[i][3] << 16 | /* Y2AC */
2935                       quantization_value[i][2] <<  0); /* Y2DC */
2936     }
2937
2938     /* CoeffProbability table for non-key frame, DW16-DW18 */
2939     if (probs_bo) {
2940         OUT_BCS_RELOC64(batch, probs_bo,
2941                         0, I915_GEM_DOMAIN_INSTRUCTION,
2942                         0);
2943         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2944     } else {
2945         OUT_BCS_BATCH(batch, 0);
2946         OUT_BCS_BATCH(batch, 0);
2947         OUT_BCS_BATCH(batch, 0);
2948     }
2949
2950     OUT_BCS_BATCH(batch,
2951                   pic_param->mb_segment_tree_probs[2] << 16 |
2952                   pic_param->mb_segment_tree_probs[1] <<  8 |
2953                   pic_param->mb_segment_tree_probs[0] <<  0);
2954
2955     OUT_BCS_BATCH(batch,
2956                   pic_param->prob_skip_false << 24 |
2957                   pic_param->prob_intra      << 16 |
2958                   pic_param->prob_last       <<  8 |
2959                   pic_param->prob_gf         <<  0);
2960
2961     OUT_BCS_BATCH(batch,
2962                   pic_param->y_mode_probs[3] << 24 |
2963                   pic_param->y_mode_probs[2] << 16 |
2964                   pic_param->y_mode_probs[1] <<  8 |
2965                   pic_param->y_mode_probs[0] <<  0);
2966
2967     OUT_BCS_BATCH(batch,
2968                   pic_param->uv_mode_probs[2] << 16 |
2969                   pic_param->uv_mode_probs[1] <<  8 |
2970                   pic_param->uv_mode_probs[0] <<  0);
2971
2972     /* MV update value, DW23-DW32 */
2973     for (i = 0; i < 2; i++) {
2974         for (j = 0; j < 20; j += 4) {
2975             OUT_BCS_BATCH(batch,
2976                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2977                           pic_param->mv_probs[i][j + 2] << 16 |
2978                           pic_param->mv_probs[i][j + 1] <<  8 |
2979                           pic_param->mv_probs[i][j + 0] <<  0);
2980         }
2981     }
2982
2983     OUT_BCS_BATCH(batch,
2984                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2985                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2986                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2987                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2988
2989     OUT_BCS_BATCH(batch,
2990                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2991                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2992                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2993                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2994
2995     /* segmentation id stream base address, DW35-DW37 */
2996     if (enable_segmentation) {
2997         OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
2998                         0, I915_GEM_DOMAIN_INSTRUCTION,
2999                         0);
3000         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
3001     } else {
3002         OUT_BCS_BATCH(batch, 0);
3003         OUT_BCS_BATCH(batch, 0);
3004         OUT_BCS_BATCH(batch, 0);
3005     }
3006     ADVANCE_BCS_BATCH(batch);
3007 }
3008
3009 static void
3010 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
3011                         VAPictureParameterBufferVP8 *pic_param,
3012                         VASliceParameterBufferVP8 *slice_param,
3013                         dri_bo *slice_data_bo,
3014                         struct gen7_mfd_context *gen7_mfd_context)
3015 {
3016     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3017     int i, log2num;
3018     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7) >> 3);
3019     unsigned int used_bits = 8 - pic_param->bool_coder_ctx.count;
3020     unsigned int partition_size_0 = slice_param->partition_size[0];
3021
3022     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
3023     if (used_bits == 8) {
3024         used_bits = 0;
3025         offset += 1;
3026         partition_size_0 -= 1;
3027     }
3028
3029     assert(slice_param->num_of_partitions >= 2);
3030     assert(slice_param->num_of_partitions <= 9);
3031
3032     log2num = (int)log2(slice_param->num_of_partitions - 1);
3033
3034     BEGIN_BCS_BATCH(batch, 22);
3035     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
3036     OUT_BCS_BATCH(batch,
3037                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
3038                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
3039                   log2num << 4 |
3040                   (slice_param->macroblock_offset & 0x7));
3041     OUT_BCS_BATCH(batch,
3042                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3043                   0);
3044
3045     OUT_BCS_BATCH(batch, partition_size_0 + 1);
3046     OUT_BCS_BATCH(batch, offset);
3047     //partion sizes in bytes are present after the above first partition when there are more than one token partition
3048     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3049     for (i = 1; i < 9; i++) {
3050         if (i < slice_param->num_of_partitions) {
3051             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3052             OUT_BCS_BATCH(batch, offset);
3053         } else {
3054             OUT_BCS_BATCH(batch, 0);
3055             OUT_BCS_BATCH(batch, 0);
3056         }
3057
3058         offset += slice_param->partition_size[i];
3059     }
3060
3061     OUT_BCS_BATCH(batch, 0); /* concealment method */
3062
3063     ADVANCE_BCS_BATCH(batch);
3064 }
3065
3066 void
3067 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3068                             struct decode_state *decode_state,
3069                             struct gen7_mfd_context *gen7_mfd_context)
3070 {
3071     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3072     VAPictureParameterBufferVP8 *pic_param;
3073     VASliceParameterBufferVP8 *slice_param;
3074     dri_bo *slice_data_bo;
3075
3076     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3077     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3078
3079     /* one slice per frame */
3080     if (decode_state->num_slice_params != 1 ||
3081         (!decode_state->slice_params ||
3082          !decode_state->slice_params[0] ||
3083          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3084         (!decode_state->slice_datas ||
3085          !decode_state->slice_datas[0] ||
3086          !decode_state->slice_datas[0]->bo) ||
3087         !decode_state->probability_data) {
3088         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3089
3090         return;
3091     }
3092
3093     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3094     slice_data_bo = decode_state->slice_datas[0]->bo;
3095
3096     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3097     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3098     intel_batchbuffer_emit_mi_flush(batch);
3099     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3100     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3101     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3102     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3103     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3104     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3105     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3106     intel_batchbuffer_end_atomic(batch);
3107     intel_batchbuffer_flush(batch);
3108 }
3109
3110 static VAStatus
3111 gen8_mfd_decode_picture(VADriverContextP ctx,
3112                         VAProfile profile,
3113                         union codec_state *codec_state,
3114                         struct hw_context *hw_context)
3115
3116 {
3117     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3118     struct decode_state *decode_state = &codec_state->decode;
3119     VAStatus vaStatus;
3120
3121     assert(gen7_mfd_context);
3122
3123     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3124
3125     if (vaStatus != VA_STATUS_SUCCESS)
3126         goto out;
3127
3128     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3129
3130     switch (profile) {
3131     case VAProfileMPEG2Simple:
3132     case VAProfileMPEG2Main:
3133         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3134         break;
3135
3136     case VAProfileH264ConstrainedBaseline:
3137     case VAProfileH264Main:
3138     case VAProfileH264High:
3139     case VAProfileH264StereoHigh:
3140     case VAProfileH264MultiviewHigh:
3141         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3142         break;
3143
3144     case VAProfileVC1Simple:
3145     case VAProfileVC1Main:
3146     case VAProfileVC1Advanced:
3147         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3148         break;
3149
3150     case VAProfileJPEGBaseline:
3151         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3152         break;
3153
3154     case VAProfileVP8Version0_3:
3155         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3156         break;
3157
3158     default:
3159         assert(0);
3160         break;
3161     }
3162
3163     vaStatus = VA_STATUS_SUCCESS;
3164
3165 out:
3166     return vaStatus;
3167 }
3168
3169 static void
3170 gen8_mfd_context_destroy(void *hw_context)
3171 {
3172     VADriverContextP ctx;
3173     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3174
3175     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3176
3177     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3178     gen7_mfd_context->post_deblocking_output.bo = NULL;
3179
3180     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3181     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3182
3183     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3184     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3185
3186     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3187     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3188
3189     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3190     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3191
3192     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3193     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3194
3195     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3196     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3197
3198     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3199     gen7_mfd_context->segmentation_buffer.bo = NULL;
3200
3201     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3202
3203     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3204         i965_DestroySurfaces(ctx,
3205                              &gen7_mfd_context->jpeg_wa_surface_id,
3206                              1);
3207         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3208     }
3209
3210     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3211     free(gen7_mfd_context);
3212 }
3213
3214 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3215                                         struct gen7_mfd_context *gen7_mfd_context)
3216 {
3217     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3218     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3219     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3220     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3221 }
3222
3223 struct hw_context *
3224 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3225 {
3226     struct intel_driver_data *intel = intel_driver_data(ctx);
3227     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3228     int i;
3229
3230     if (!gen7_mfd_context)
3231         return NULL;
3232
3233     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3234     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3235     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3236
3237     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3238         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3239         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3240     }
3241
3242     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3243     gen7_mfd_context->segmentation_buffer.valid = 0;
3244
3245     switch (obj_config->profile) {
3246     case VAProfileMPEG2Simple:
3247     case VAProfileMPEG2Main:
3248         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3249         break;
3250
3251     case VAProfileH264ConstrainedBaseline:
3252     case VAProfileH264Main:
3253     case VAProfileH264High:
3254     case VAProfileH264StereoHigh:
3255     case VAProfileH264MultiviewHigh:
3256         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3257         break;
3258     default:
3259         break;
3260     }
3261
3262     gen7_mfd_context->driver_context = ctx;
3263     return (struct hw_context *)gen7_mfd_context;
3264 }