OSDN Git Service

decode: release huffman_table from decode state
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV     2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77
78         if (!gen7_avc_surface)
79             return;
80
81         gen7_avc_surface->base.frame_store_id = -1;
82         assert((obj_surface->size & 0x3f) == 0);
83         obj_surface->private_data = gen7_avc_surface;
84     }
85
86     /* DMV buffers now relate to the whole frame, irrespective of
87        field coding modes */
88     if (gen7_avc_surface->dmv_top == NULL) {
89         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90                                                  "direct mv w/r buffer",
91                                                  width_in_mbs * height_in_mbs * 128,
92                                                  0x1000);
93         assert(gen7_avc_surface->dmv_top);
94     }
95 }
96
97 static void
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99                           struct decode_state *decode_state,
100                           int standard_select,
101                           struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG ||
109            standard_select == MFX_FORMAT_VP8);
110
111     BEGIN_BCS_BATCH(batch, 5);
112     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
113     OUT_BCS_BATCH(batch,
114                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
115                   (MFD_MODE_VLD << 15) | /* VLD mode */
116                   (0 << 10) | /* disable Stream-Out */
117                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
118                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
119                   (0 << 5)  | /* not in stitch mode */
120                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
121                   (standard_select << 0));
122     OUT_BCS_BATCH(batch,
123                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
124                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
125                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
126                   (0 << 1)  |
127                   (0 << 0));
128     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
129     OUT_BCS_BATCH(batch, 0); /* reserved */
130     ADVANCE_BCS_BATCH(batch);
131 }
132
133 static void
134 gen8_mfd_surface_state(VADriverContextP ctx,
135                        struct decode_state *decode_state,
136                        int standard_select,
137                        struct gen7_mfd_context *gen7_mfd_context)
138 {
139     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140     struct object_surface *obj_surface = decode_state->render_object;
141     unsigned int y_cb_offset;
142     unsigned int y_cr_offset;
143     unsigned int surface_format;
144
145     assert(obj_surface);
146
147     y_cb_offset = obj_surface->y_cb_offset;
148     y_cr_offset = obj_surface->y_cr_offset;
149
150     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151                      MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
152
153     BEGIN_BCS_BATCH(batch, 6);
154     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155     OUT_BCS_BATCH(batch, 0);
156     OUT_BCS_BATCH(batch,
157                   ((obj_surface->orig_height - 1) << 18) |
158                   ((obj_surface->orig_width - 1) << 4));
159     OUT_BCS_BATCH(batch,
160                   (surface_format << 28) | /* 420 planar YUV surface */
161                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162                   (0 << 22) | /* surface object control state, ignored */
163                   ((obj_surface->width - 1) << 3) | /* pitch */
164                   (0 << 2)  | /* must be 0 */
165                   (1 << 1)  | /* must be tiled */
166                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
167     OUT_BCS_BATCH(batch,
168                   (0 << 16) | /* X offset for U(Cb), must be 0 */
169                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
170     OUT_BCS_BATCH(batch,
171                   (0 << 16) | /* X offset for V(Cr), must be 0 */
172                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173     ADVANCE_BCS_BATCH(batch);
174 }
175
176 static void
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178                              struct decode_state *decode_state,
179                              int standard_select,
180                              struct gen7_mfd_context *gen7_mfd_context)
181 {
182     struct i965_driver_data *i965 = i965_driver_data(ctx);
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188     /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                         0);
193     else {
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197     }
198     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
199
200     /* Post-debloing 4-6 */
201     if (gen7_mfd_context->post_deblocking_output.valid)
202         OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
203                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                         0);
205     else {
206         OUT_BCS_BATCH(batch, 0);
207
208         OUT_BCS_BATCH(batch, 0);
209     }
210     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
211
212     /* uncompressed-video & stream out 7-12 */
213     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
214     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
215     OUT_BCS_BATCH(batch, 0);
216     OUT_BCS_BATCH(batch, 0);
217     OUT_BCS_BATCH(batch, 0);
218     OUT_BCS_BATCH(batch, 0);
219
220     /* intra row-store scratch 13-15 */
221     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
222         OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
223                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
224                         0);
225     else {
226         OUT_BCS_BATCH(batch, 0);
227
228         OUT_BCS_BATCH(batch, 0);
229     }
230     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
231
232     /* deblocking-filter-row-store 16-18 */
233     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
234         OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
235                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
236                         0);
237     else {
238         OUT_BCS_BATCH(batch, 0);
239         OUT_BCS_BATCH(batch, 0);
240     }
241
242     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
243
244     /* DW 19..50 */
245     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
246         struct object_surface *obj_surface;
247
248         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
249             gen7_mfd_context->reference_surface[i].obj_surface &&
250             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
251             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
252
253             OUT_BCS_RELOC64(batch, obj_surface->bo,
254                             I915_GEM_DOMAIN_INSTRUCTION, 0,
255                             0);
256         } else {
257             OUT_BCS_BATCH(batch, 0);
258             OUT_BCS_BATCH(batch, 0);
259         }
260
261     }
262
263     /* reference property 51 */
264     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
265
266     /* Macroblock status & ILDB 52-57 */
267     OUT_BCS_BATCH(batch, 0);
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271     OUT_BCS_BATCH(batch, 0);
272     OUT_BCS_BATCH(batch, 0);
273
274     /* the second Macroblock status 58-60 */
275     OUT_BCS_BATCH(batch, 0);
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch, 0);
278
279     ADVANCE_BCS_BATCH(batch);
280 }
281
282 static void
283 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
284                                  dri_bo *slice_data_bo,
285                                  int standard_select,
286                                  struct gen7_mfd_context *gen7_mfd_context)
287 {
288     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
289     struct i965_driver_data *i965 = i965_driver_data(ctx);
290
291     BEGIN_BCS_BATCH(batch, 26);
292     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
293     /* MFX In BS 1-5 */
294     OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
295     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
296     /* Upper bound 4-5 */
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299
300     /* MFX indirect MV 6-10 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307     /* MFX IT_COFF 11-15 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314     /* MFX IT_DBLK 16-20 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321     /* MFX PAK_BSE object for encoder 21-25 */
322     OUT_BCS_BATCH(batch, 0);
323     OUT_BCS_BATCH(batch, 0);
324     OUT_BCS_BATCH(batch, 0);
325     OUT_BCS_BATCH(batch, 0);
326     OUT_BCS_BATCH(batch, 0);
327
328     ADVANCE_BCS_BATCH(batch);
329 }
330
331 static void
332 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
333                                  struct decode_state *decode_state,
334                                  int standard_select,
335                                  struct gen7_mfd_context *gen7_mfd_context)
336 {
337     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
338     struct i965_driver_data *i965 = i965_driver_data(ctx);
339
340     BEGIN_BCS_BATCH(batch, 10);
341     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
342
343     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
344         OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
345                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
346                         0);
347     else {
348         OUT_BCS_BATCH(batch, 0);
349         OUT_BCS_BATCH(batch, 0);
350     }
351
352     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
353     /* MPR Row Store Scratch buffer 4-6 */
354     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
355         OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
356                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
357                         0);
358     else {
359         OUT_BCS_BATCH(batch, 0);
360         OUT_BCS_BATCH(batch, 0);
361     }
362
363     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
364
365     /* Bitplane 7-9 */
366     if (gen7_mfd_context->bitplane_read_buffer.valid)
367         OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
368                         I915_GEM_DOMAIN_INSTRUCTION, 0,
369                         0);
370     else {
371         OUT_BCS_BATCH(batch, 0);
372         OUT_BCS_BATCH(batch, 0);
373     }
374     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
375     ADVANCE_BCS_BATCH(batch);
376 }
377
378 static void
379 gen8_mfd_qm_state(VADriverContextP ctx,
380                   int qm_type,
381                   unsigned char *qm,
382                   int qm_length,
383                   struct gen7_mfd_context *gen7_mfd_context)
384 {
385     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
386     unsigned int qm_buffer[16];
387
388     assert(qm_length <= 16 * 4);
389     memcpy(qm_buffer, qm, qm_length);
390
391     BEGIN_BCS_BATCH(batch, 18);
392     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
393     OUT_BCS_BATCH(batch, qm_type << 0);
394     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
395     ADVANCE_BCS_BATCH(batch);
396 }
397
398 static void
399 gen8_mfd_avc_img_state(VADriverContextP ctx,
400                        struct decode_state *decode_state,
401                        struct gen7_mfd_context *gen7_mfd_context)
402 {
403     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
404     int img_struct;
405     int mbaff_frame_flag;
406     unsigned int width_in_mbs, height_in_mbs;
407     VAPictureParameterBufferH264 *pic_param;
408
409     assert(decode_state->pic_param && decode_state->pic_param->buffer);
410     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
412
413     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
414         img_struct = 1;
415     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
416         img_struct = 3;
417     else
418         img_struct = 0;
419
420     if ((img_struct & 0x1) == 0x1) {
421         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
422     } else {
423         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
424     }
425
426     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
427         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
428         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
429     } else {
430         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
431     }
432
433     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
434                         !pic_param->pic_fields.bits.field_pic_flag);
435
436     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
437     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
438
439     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
440     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
441            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
442     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
443
444     BEGIN_BCS_BATCH(batch, 17);
445     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
446     OUT_BCS_BATCH(batch,
447                   (width_in_mbs * height_in_mbs - 1));
448     OUT_BCS_BATCH(batch,
449                   ((height_in_mbs - 1) << 16) |
450                   ((width_in_mbs - 1) << 0));
451     OUT_BCS_BATCH(batch,
452                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
453                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
454                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
455                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
456                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
457                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
458                   (img_struct << 8));
459     OUT_BCS_BATCH(batch,
460                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
461                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
462                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
463                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
464                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
465                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
466                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
467                   (mbaff_frame_flag << 1) |
468                   (pic_param->pic_fields.bits.field_pic_flag << 0));
469     OUT_BCS_BATCH(batch, 0);
470     OUT_BCS_BATCH(batch, 0);
471     OUT_BCS_BATCH(batch, 0);
472     OUT_BCS_BATCH(batch, 0);
473     OUT_BCS_BATCH(batch, 0);
474     OUT_BCS_BATCH(batch, 0);
475     OUT_BCS_BATCH(batch, 0);
476     OUT_BCS_BATCH(batch, 0);
477     OUT_BCS_BATCH(batch, 0);
478     OUT_BCS_BATCH(batch, 0);
479     OUT_BCS_BATCH(batch, 0);
480     OUT_BCS_BATCH(batch, 0);
481     ADVANCE_BCS_BATCH(batch);
482 }
483
484 static void
485 gen8_mfd_avc_qm_state(VADriverContextP ctx,
486                       struct decode_state *decode_state,
487                       struct gen7_mfd_context *gen7_mfd_context)
488 {
489     VAIQMatrixBufferH264 *iq_matrix;
490     VAPictureParameterBufferH264 *pic_param;
491
492     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
493         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
494     else
495         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
496
497     assert(decode_state->pic_param && decode_state->pic_param->buffer);
498     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
499
500     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
501     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
502
503     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
504         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
505         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
506     }
507 }
508
509 static inline void
510 gen8_mfd_avc_picid_state(VADriverContextP ctx,
511                          struct decode_state *decode_state,
512                          struct gen7_mfd_context *gen7_mfd_context)
513 {
514     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
515                                gen7_mfd_context->reference_surface);
516 }
517
518 static void
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520                               struct decode_state *decode_state,
521                               VAPictureParameterBufferH264 *pic_param,
522                               VASliceParameterBufferH264 *slice_param,
523                               struct gen7_mfd_context *gen7_mfd_context)
524 {
525     struct i965_driver_data *i965 = i965_driver_data(ctx);
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
545                             I915_GEM_DOMAIN_INSTRUCTION, 0,
546                             0);
547         } else {
548             OUT_BCS_BATCH(batch, 0);
549             OUT_BCS_BATCH(batch, 0);
550         }
551     }
552
553     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
554
555     /* the current decoding frame/field */
556     va_pic = &pic_param->CurrPic;
557     obj_surface = decode_state->render_object;
558     assert(obj_surface->bo && obj_surface->private_data);
559     gen7_avc_surface = obj_surface->private_data;
560
561     OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
562                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
563                     0);
564
565     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
566
567     /* POC List */
568     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
569         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
570
571         if (obj_surface) {
572             const VAPictureH264 * const va_pic = avc_find_picture(
573                                                      obj_surface->base.id, pic_param->ReferenceFrames,
574                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
575
576             assert(va_pic != NULL);
577             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
578             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
579         } else {
580             OUT_BCS_BATCH(batch, 0);
581             OUT_BCS_BATCH(batch, 0);
582         }
583     }
584
585     va_pic = &pic_param->CurrPic;
586     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
587     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
588
589     ADVANCE_BCS_BATCH(batch);
590 }
591
592 static void
593 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
594                                  VAPictureParameterBufferH264 *pic_param,
595                                  VASliceParameterBufferH264 *next_slice_param,
596                                  struct gen7_mfd_context *gen7_mfd_context)
597 {
598     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
599 }
600
601 static void
602 gen8_mfd_avc_slice_state(VADriverContextP ctx,
603                          VAPictureParameterBufferH264 *pic_param,
604                          VASliceParameterBufferH264 *slice_param,
605                          VASliceParameterBufferH264 *next_slice_param,
606                          struct gen7_mfd_context *gen7_mfd_context)
607 {
608     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
609     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
610     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
611     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
612     int num_ref_idx_l0, num_ref_idx_l1;
613     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
614                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
615     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
616     int slice_type;
617
618     if (slice_param->slice_type == SLICE_TYPE_I ||
619         slice_param->slice_type == SLICE_TYPE_SI) {
620         slice_type = SLICE_TYPE_I;
621     } else if (slice_param->slice_type == SLICE_TYPE_P ||
622                slice_param->slice_type == SLICE_TYPE_SP) {
623         slice_type = SLICE_TYPE_P;
624     } else {
625         assert(slice_param->slice_type == SLICE_TYPE_B);
626         slice_type = SLICE_TYPE_B;
627     }
628
629     if (slice_type == SLICE_TYPE_I) {
630         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
631         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
632         num_ref_idx_l0 = 0;
633         num_ref_idx_l1 = 0;
634     } else if (slice_type == SLICE_TYPE_P) {
635         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
637         num_ref_idx_l1 = 0;
638     } else {
639         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
640         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
641     }
642
643     first_mb_in_slice = slice_param->first_mb_in_slice;
644     slice_hor_pos = first_mb_in_slice % width_in_mbs;
645     slice_ver_pos = first_mb_in_slice / width_in_mbs;
646
647     if (mbaff_picture)
648         slice_ver_pos = slice_ver_pos << 1;
649     if (next_slice_param) {
650         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
651         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
652         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
653
654         if (mbaff_picture)
655             next_slice_ver_pos = next_slice_ver_pos << 1;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch,
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch,
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch,
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) |
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch,
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch,
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
839                                        gen7_mfd_context->reference_surface);
840     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
841     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
842     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
843     assert(height_in_mbs > 0 && height_in_mbs <= 256);
844
845     /* Current decoded picture */
846     obj_surface = decode_state->render_object;
847     if (pic_param->pic_fields.bits.reference_pic_flag)
848         obj_surface->flags |= SURFACE_REFERENCED;
849     else
850         obj_surface->flags &= ~SURFACE_REFERENCED;
851
852     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
853     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
854
855     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
856     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
857     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
858     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
859
860     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
861     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
862     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
863     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
864
865     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
866     bo = dri_bo_alloc(i965->intel.bufmgr,
867                       "intra row store",
868                       width_in_mbs * 64,
869                       0x1000);
870     assert(bo);
871     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
872     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
873
874     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
875     bo = dri_bo_alloc(i965->intel.bufmgr,
876                       "deblocking filter row store",
877                       width_in_mbs * 64 * 4,
878                       0x1000);
879     assert(bo);
880     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
881     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
882
883     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
884     bo = dri_bo_alloc(i965->intel.bufmgr,
885                       "bsd mpc row store",
886                       width_in_mbs * 64 * 2,
887                       0x1000);
888     assert(bo);
889     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
890     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
891
892     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
893     bo = dri_bo_alloc(i965->intel.bufmgr,
894                       "mpr row store",
895                       width_in_mbs * 64 * 2,
896                       0x1000);
897     assert(bo);
898     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
899     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
900
901     gen7_mfd_context->bitplane_read_buffer.valid = 0;
902 }
903
904 static void
905 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
906                             struct decode_state *decode_state,
907                             struct gen7_mfd_context *gen7_mfd_context)
908 {
909     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
910     VAPictureParameterBufferH264 *pic_param;
911     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
912     dri_bo *slice_data_bo;
913     int i, j;
914
915     assert(decode_state->pic_param && decode_state->pic_param->buffer);
916     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
917     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
918
919     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
920     intel_batchbuffer_emit_mi_flush(batch);
921     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
923     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
924     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
925     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
926     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
927     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
928
929     for (j = 0; j < decode_state->num_slice_params; j++) {
930         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
931         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
932         slice_data_bo = decode_state->slice_datas[j]->bo;
933         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
934
935         if (j == decode_state->num_slice_params - 1)
936             next_slice_group_param = NULL;
937         else
938             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
939
940         if (j == 0 && slice_param->first_mb_in_slice)
941             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
942
943         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
944             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
945             assert((slice_param->slice_type == SLICE_TYPE_I) ||
946                    (slice_param->slice_type == SLICE_TYPE_SI) ||
947                    (slice_param->slice_type == SLICE_TYPE_P) ||
948                    (slice_param->slice_type == SLICE_TYPE_SP) ||
949                    (slice_param->slice_type == SLICE_TYPE_B));
950
951             if (i < decode_state->slice_params[j]->num_elements - 1)
952                 next_slice_param = slice_param + 1;
953             else
954                 next_slice_param = next_slice_group_param;
955
956             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
957             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
958             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
959             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
960             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
961             slice_param++;
962         }
963     }
964
965     intel_batchbuffer_end_atomic(batch);
966     intel_batchbuffer_flush(batch);
967 }
968
969 static void
970 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
971                            struct decode_state *decode_state,
972                            struct gen7_mfd_context *gen7_mfd_context)
973 {
974     VAPictureParameterBufferMPEG2 *pic_param;
975     struct i965_driver_data *i965 = i965_driver_data(ctx);
976     struct object_surface *obj_surface;
977     dri_bo *bo;
978     unsigned int width_in_mbs;
979
980     assert(decode_state->pic_param && decode_state->pic_param->buffer);
981     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
982     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
983
984     mpeg2_set_reference_surfaces(
985         ctx,
986         gen7_mfd_context->reference_surface,
987         decode_state,
988         pic_param
989     );
990
991     /* Current decoded picture */
992     obj_surface = decode_state->render_object;
993     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
994
995     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
996     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
997     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
998     gen7_mfd_context->pre_deblocking_output.valid = 1;
999
1000     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1001     bo = dri_bo_alloc(i965->intel.bufmgr,
1002                       "bsd mpc row store",
1003                       width_in_mbs * 96,
1004                       0x1000);
1005     assert(bo);
1006     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1007     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1008
1009     gen7_mfd_context->post_deblocking_output.valid = 0;
1010     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1011     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1012     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1013     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1014 }
1015
1016 static void
1017 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1018                          struct decode_state *decode_state,
1019                          struct gen7_mfd_context *gen7_mfd_context)
1020 {
1021     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1022     VAPictureParameterBufferMPEG2 *pic_param;
1023     unsigned int slice_concealment_disable_bit = 0;
1024
1025     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1026     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1027
1028     slice_concealment_disable_bit = 1;
1029
1030     BEGIN_BCS_BATCH(batch, 13);
1031     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1032     OUT_BCS_BATCH(batch,
1033                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1034                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1035                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1036                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1037                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1038                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1039                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1040                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1041                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1042                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1043                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1044                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1045     OUT_BCS_BATCH(batch,
1046                   pic_param->picture_coding_type << 9);
1047     OUT_BCS_BATCH(batch,
1048                   (slice_concealment_disable_bit << 31) |
1049                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1050                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1051     OUT_BCS_BATCH(batch, 0);
1052     OUT_BCS_BATCH(batch, 0);
1053     OUT_BCS_BATCH(batch, 0);
1054     OUT_BCS_BATCH(batch, 0);
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     ADVANCE_BCS_BATCH(batch);
1061 }
1062
1063 static void
1064 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1065                         struct decode_state *decode_state,
1066                         struct gen7_mfd_context *gen7_mfd_context)
1067 {
1068     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1069     int i, j;
1070
1071     /* Update internal QM state */
1072     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1073         VAIQMatrixBufferMPEG2 * const iq_matrix =
1074             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1075
1076         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1077             iq_matrix->load_intra_quantiser_matrix) {
1078             gen_iq_matrix->load_intra_quantiser_matrix =
1079                 iq_matrix->load_intra_quantiser_matrix;
1080             if (iq_matrix->load_intra_quantiser_matrix) {
1081                 for (j = 0; j < 64; j++)
1082                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1083                         iq_matrix->intra_quantiser_matrix[j];
1084             }
1085         }
1086
1087         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1088             iq_matrix->load_non_intra_quantiser_matrix) {
1089             gen_iq_matrix->load_non_intra_quantiser_matrix =
1090                 iq_matrix->load_non_intra_quantiser_matrix;
1091             if (iq_matrix->load_non_intra_quantiser_matrix) {
1092                 for (j = 0; j < 64; j++)
1093                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1094                         iq_matrix->non_intra_quantiser_matrix[j];
1095             }
1096         }
1097     }
1098
1099     /* Commit QM state to HW */
1100     for (i = 0; i < 2; i++) {
1101         unsigned char *qm = NULL;
1102         int qm_type;
1103
1104         if (i == 0) {
1105             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1106                 qm = gen_iq_matrix->intra_quantiser_matrix;
1107                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1108             }
1109         } else {
1110             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1111                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1112                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1113             }
1114         }
1115
1116         if (!qm)
1117             continue;
1118
1119         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1120     }
1121 }
1122
1123 static void
1124 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1125                           VAPictureParameterBufferMPEG2 *pic_param,
1126                           VASliceParameterBufferMPEG2 *slice_param,
1127                           VASliceParameterBufferMPEG2 *next_slice_param,
1128                           struct gen7_mfd_context *gen7_mfd_context)
1129 {
1130     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1131     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1132     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1133
1134     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1135         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1136         is_field_pic = 1;
1137     is_field_pic_wa = is_field_pic &&
1138                       gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1139
1140     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1141     hpos0 = slice_param->slice_horizontal_position;
1142
1143     if (next_slice_param == NULL) {
1144         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1145         hpos1 = 0;
1146     } else {
1147         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1148         hpos1 = next_slice_param->slice_horizontal_position;
1149     }
1150
1151     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1152
1153     BEGIN_BCS_BATCH(batch, 5);
1154     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1155     OUT_BCS_BATCH(batch,
1156                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1157     OUT_BCS_BATCH(batch,
1158                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1159     OUT_BCS_BATCH(batch,
1160                   hpos0 << 24 |
1161                   vpos0 << 16 |
1162                   mb_count << 8 |
1163                   (next_slice_param == NULL) << 5 |
1164                   (next_slice_param == NULL) << 3 |
1165                   (slice_param->macroblock_offset & 0x7));
1166     OUT_BCS_BATCH(batch,
1167                   (slice_param->quantiser_scale_code << 24) |
1168                   (vpos1 << 8 | hpos1));
1169     ADVANCE_BCS_BATCH(batch);
1170 }
1171
1172 static void
1173 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1174                               struct decode_state *decode_state,
1175                               struct gen7_mfd_context *gen7_mfd_context)
1176 {
1177     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1178     VAPictureParameterBufferMPEG2 *pic_param;
1179     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1180     dri_bo *slice_data_bo;
1181     int i, j;
1182
1183     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1184     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1185
1186     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1187     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1188     intel_batchbuffer_emit_mi_flush(batch);
1189     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1194     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1195
1196     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1197         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1198             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1199
1200     for (j = 0; j < decode_state->num_slice_params; j++) {
1201         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1202         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1203         slice_data_bo = decode_state->slice_datas[j]->bo;
1204         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1205
1206         if (j == decode_state->num_slice_params - 1)
1207             next_slice_group_param = NULL;
1208         else
1209             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1210
1211         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1212             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1213
1214             if (i < decode_state->slice_params[j]->num_elements - 1)
1215                 next_slice_param = slice_param + 1;
1216             else
1217                 next_slice_param = next_slice_group_param;
1218
1219             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1220             slice_param++;
1221         }
1222     }
1223
1224     intel_batchbuffer_end_atomic(batch);
1225     intel_batchbuffer_flush(batch);
1226 }
1227
1228 static const int va_to_gen7_vc1_pic_type[5] = {
1229     GEN7_VC1_I_PICTURE,
1230     GEN7_VC1_P_PICTURE,
1231     GEN7_VC1_B_PICTURE,
1232     GEN7_VC1_BI_PICTURE,
1233     GEN7_VC1_P_PICTURE,
1234 };
1235
1236 static const int va_to_gen7_vc1_mv[4] = {
1237     1, /* 1-MV */
1238     2, /* 1-MV half-pel */
1239     3, /* 1-MV half-pef bilinear */
1240     0, /* Mixed MV */
1241 };
1242
1243 static const int b_picture_scale_factor[21] = {
1244     128, 85,  170, 64,  192,
1245     51,  102, 153, 204, 43,
1246     215, 37,  74,  111, 148,
1247     185, 222, 32,  96,  160,
1248     224,
1249 };
1250
1251 static const int va_to_gen7_vc1_condover[3] = {
1252     0,
1253     2,
1254     3
1255 };
1256
1257 static const int va_to_gen7_vc1_profile[4] = {
1258     GEN7_VC1_SIMPLE_PROFILE,
1259     GEN7_VC1_MAIN_PROFILE,
1260     GEN7_VC1_RESERVED_PROFILE,
1261     GEN7_VC1_ADVANCED_PROFILE
1262 };
1263
1264 static void
1265 gen8_mfd_free_vc1_surface(void **data)
1266 {
1267     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1268
1269     if (!gen7_vc1_surface)
1270         return;
1271
1272     dri_bo_unreference(gen7_vc1_surface->dmv);
1273     free(gen7_vc1_surface);
1274     *data = NULL;
1275 }
1276
1277 static void
1278 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1279                           VAPictureParameterBufferVC1 *pic_param,
1280                           struct object_surface *obj_surface)
1281 {
1282     struct i965_driver_data *i965 = i965_driver_data(ctx);
1283     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1284     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1285     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1286
1287     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1288
1289     if (!gen7_vc1_surface) {
1290         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1291
1292         if (!gen7_vc1_surface)
1293             return;
1294
1295         assert((obj_surface->size & 0x3f) == 0);
1296         obj_surface->private_data = gen7_vc1_surface;
1297     }
1298
1299     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1300
1301     if (gen7_vc1_surface->dmv == NULL) {
1302         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1303                                              "direct mv w/r buffer",
1304                                              width_in_mbs * height_in_mbs * 64,
1305                                              0x1000);
1306     }
1307 }
1308
1309 static void
1310 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1311                          struct decode_state *decode_state,
1312                          struct gen7_mfd_context *gen7_mfd_context)
1313 {
1314     VAPictureParameterBufferVC1 *pic_param;
1315     struct i965_driver_data *i965 = i965_driver_data(ctx);
1316     struct object_surface *obj_surface;
1317     dri_bo *bo;
1318     int width_in_mbs;
1319     int picture_type;
1320
1321     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1322     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1323     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1324     picture_type = pic_param->picture_fields.bits.picture_type;
1325
1326     intel_update_vc1_frame_store_index(ctx,
1327                                        decode_state,
1328                                        pic_param,
1329                                        gen7_mfd_context->reference_surface);
1330
1331     /* Current decoded picture */
1332     obj_surface = decode_state->render_object;
1333     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1334     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1335
1336     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1337     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1338     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1339     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1340
1341     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1342     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1343     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1344     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1345
1346     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1347     bo = dri_bo_alloc(i965->intel.bufmgr,
1348                       "intra row store",
1349                       width_in_mbs * 64,
1350                       0x1000);
1351     assert(bo);
1352     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1353     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1354
1355     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1356     bo = dri_bo_alloc(i965->intel.bufmgr,
1357                       "deblocking filter row store",
1358                       width_in_mbs * 7 * 64,
1359                       0x1000);
1360     assert(bo);
1361     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1362     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1363
1364     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1365     bo = dri_bo_alloc(i965->intel.bufmgr,
1366                       "bsd mpc row store",
1367                       width_in_mbs * 96,
1368                       0x1000);
1369     assert(bo);
1370     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1371     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1372
1373     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1374
1375     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1376     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1377
1378     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1379         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1380         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1381         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1382         int src_w, src_h;
1383         uint8_t *src = NULL, *dst = NULL;
1384
1385         assert(decode_state->bit_plane->buffer);
1386         src = decode_state->bit_plane->buffer;
1387
1388         bo = dri_bo_alloc(i965->intel.bufmgr,
1389                           "VC-1 Bitplane",
1390                           bitplane_width * height_in_mbs,
1391                           0x1000);
1392         assert(bo);
1393         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1394
1395         dri_bo_map(bo, True);
1396         assert(bo->virtual);
1397         dst = bo->virtual;
1398
1399         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1400             for (src_w = 0; src_w < width_in_mbs; src_w++) {
1401                 int src_index, dst_index;
1402                 int src_shift;
1403                 uint8_t src_value;
1404
1405                 src_index = (src_h * width_in_mbs + src_w) / 2;
1406                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1407                 src_value = ((src[src_index] >> src_shift) & 0xf);
1408
1409                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1410                     src_value |= 0x2;
1411                 }
1412
1413                 dst_index = src_w / 2;
1414                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1415             }
1416
1417             if (src_w & 1)
1418                 dst[src_w / 2] >>= 4;
1419
1420             dst += bitplane_width;
1421         }
1422
1423         dri_bo_unmap(bo);
1424     } else
1425         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1426 }
1427
1428 static void
1429 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1430                        struct decode_state *decode_state,
1431                        struct gen7_mfd_context *gen7_mfd_context)
1432 {
1433     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1434     VAPictureParameterBufferVC1 *pic_param;
1435     struct object_surface *obj_surface;
1436     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1437     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1438     int unified_mv_mode;
1439     int ref_field_pic_polarity = 0;
1440     int scale_factor = 0;
1441     int trans_ac_y = 0;
1442     int dmv_surface_valid = 0;
1443     int brfd = 0;
1444     int fcm = 0;
1445     int picture_type;
1446     int profile;
1447     int overlap;
1448     int interpolation_mode = 0;
1449
1450     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1451     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1452
1453     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1454     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1455     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1456     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1457     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1458     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1459     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1460     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1461
1462     if (dquant == 0) {
1463         alt_pquant_config = 0;
1464         alt_pquant_edge_mask = 0;
1465     } else if (dquant == 2) {
1466         alt_pquant_config = 1;
1467         alt_pquant_edge_mask = 0xf;
1468     } else {
1469         assert(dquant == 1);
1470         if (dquantfrm == 0) {
1471             alt_pquant_config = 0;
1472             alt_pquant_edge_mask = 0;
1473             alt_pq = 0;
1474         } else {
1475             assert(dquantfrm == 1);
1476             alt_pquant_config = 1;
1477
1478             switch (dqprofile) {
1479             case 3:
1480                 if (dqbilevel == 0) {
1481                     alt_pquant_config = 2;
1482                     alt_pquant_edge_mask = 0;
1483                 } else {
1484                     assert(dqbilevel == 1);
1485                     alt_pquant_config = 3;
1486                     alt_pquant_edge_mask = 0;
1487                 }
1488                 break;
1489
1490             case 0:
1491                 alt_pquant_edge_mask = 0xf;
1492                 break;
1493
1494             case 1:
1495                 if (dqdbedge == 3)
1496                     alt_pquant_edge_mask = 0x9;
1497                 else
1498                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1499
1500                 break;
1501
1502             case 2:
1503                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1504                 break;
1505
1506             default:
1507                 assert(0);
1508             }
1509         }
1510     }
1511
1512     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1513         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1514         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1515     } else {
1516         assert(pic_param->mv_fields.bits.mv_mode < 4);
1517         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1518     }
1519
1520     if (pic_param->sequence_fields.bits.interlace == 1 &&
1521         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1522         /* FIXME: calculate reference field picture polarity */
1523         assert(0);
1524         ref_field_pic_polarity = 0;
1525     }
1526
1527     if (pic_param->b_picture_fraction < 21)
1528         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1529
1530     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1531
1532     if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1533         picture_type == GEN7_VC1_I_PICTURE)
1534         picture_type = GEN7_VC1_BI_PICTURE;
1535
1536     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1537         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1538     else {
1539         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1540
1541         /*
1542          * 8.3.6.2.1 Transform Type Selection
1543          * If variable-sized transform coding is not enabled,
1544          * then the 8x8 transform shall be used for all blocks.
1545          * it is also MFX_VC1_PIC_STATE requirement.
1546          */
1547         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1548             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1549             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1550         }
1551     }
1552
1553     if (picture_type == GEN7_VC1_B_PICTURE) {
1554         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1555
1556         obj_surface = decode_state->reference_objects[1];
1557
1558         if (obj_surface)
1559             gen7_vc1_surface = obj_surface->private_data;
1560
1561         if (!gen7_vc1_surface ||
1562             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1563              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1564             dmv_surface_valid = 0;
1565         else
1566             dmv_surface_valid = 1;
1567     }
1568
1569     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1570
1571     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1572         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1573     else {
1574         if (pic_param->picture_fields.bits.top_field_first)
1575             fcm = 2;
1576         else
1577             fcm = 3;
1578     }
1579
1580     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1581         brfd = pic_param->reference_fields.bits.reference_distance;
1582         brfd = (scale_factor * brfd) >> 8;
1583         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1584
1585         if (brfd < 0)
1586             brfd = 0;
1587     }
1588
1589     overlap = pic_param->sequence_fields.bits.overlap;
1590
1591     if (overlap) {
1592         overlap = 0;
1593         if (profile != GEN7_VC1_ADVANCED_PROFILE) {
1594             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1595                 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1596                 overlap = 1;
1597             }
1598         } else {
1599             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1600                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1601                 overlap = 1;
1602             }
1603             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1604                 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE) {
1605                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1606                     overlap = 1;
1607                 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1608                            va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1609                     overlap = 1;
1610                 }
1611             }
1612         }
1613     }
1614
1615     assert(pic_param->conditional_overlap_flag < 3);
1616     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1617
1618     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1619         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1620          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1621         interpolation_mode = 9; /* Half-pel bilinear */
1622     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1623              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1624               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1625         interpolation_mode = 1; /* Half-pel bicubic */
1626     else
1627         interpolation_mode = 0; /* Quarter-pel bicubic */
1628
1629     BEGIN_BCS_BATCH(batch, 6);
1630     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1631     OUT_BCS_BATCH(batch,
1632                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1633                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1634     OUT_BCS_BATCH(batch,
1635                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1636                   dmv_surface_valid << 15 |
1637                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1638                   pic_param->rounding_control << 13 |
1639                   pic_param->sequence_fields.bits.syncmarker << 12 |
1640                   interpolation_mode << 8 |
1641                   0 << 7 | /* FIXME: scale up or down ??? */
1642                   pic_param->range_reduction_frame << 6 |
1643                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1644                   overlap << 4 |
1645                   !pic_param->picture_fields.bits.is_first_field << 3 |
1646                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1647     OUT_BCS_BATCH(batch,
1648                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1649                   picture_type << 26 |
1650                   fcm << 24 |
1651                   alt_pq << 16 |
1652                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1653                   scale_factor << 0);
1654     OUT_BCS_BATCH(batch,
1655                   unified_mv_mode << 28 |
1656                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1657                   pic_param->fast_uvmc_flag << 26 |
1658                   ref_field_pic_polarity << 25 |
1659                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1660                   pic_param->reference_fields.bits.reference_distance << 20 |
1661                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1662                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1663                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1664                   alt_pquant_edge_mask << 4 |
1665                   alt_pquant_config << 2 |
1666                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1667                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1668     OUT_BCS_BATCH(batch,
1669                   !!pic_param->bitplane_present.value << 31 |
1670                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1671                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1672                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1673                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1674                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1675                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1676                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1677                   pic_param->mv_fields.bits.mv_table << 20 |
1678                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1679                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1680                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1681                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1682                   pic_param->mb_mode_table << 8 |
1683                   trans_ac_y << 6 |
1684                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1685                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1686                   pic_param->cbp_table << 0);
1687     ADVANCE_BCS_BATCH(batch);
1688 }
1689
1690 static void
1691 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1692                              struct decode_state *decode_state,
1693                              struct gen7_mfd_context *gen7_mfd_context)
1694 {
1695     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1696     VAPictureParameterBufferVC1 *pic_param;
1697     int intensitycomp_single;
1698
1699     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1700     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1701     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1702
1703     BEGIN_BCS_BATCH(batch, 6);
1704     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1705     OUT_BCS_BATCH(batch,
1706                   0 << 14 | /* FIXME: double ??? */
1707                   0 << 12 |
1708                   intensitycomp_single << 10 |
1709                   intensitycomp_single << 8 |
1710                   0 << 4 | /* FIXME: interlace mode */
1711                   0);
1712     OUT_BCS_BATCH(batch,
1713                   pic_param->luma_shift << 16 |
1714                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1715     OUT_BCS_BATCH(batch, 0);
1716     OUT_BCS_BATCH(batch, 0);
1717     OUT_BCS_BATCH(batch, 0);
1718     ADVANCE_BCS_BATCH(batch);
1719 }
1720
1721 static void
1722 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1723                               struct decode_state *decode_state,
1724                               struct gen7_mfd_context *gen7_mfd_context)
1725 {
1726     struct i965_driver_data *i965 = i965_driver_data(ctx);
1727     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1728     struct object_surface *obj_surface;
1729     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1730
1731     obj_surface = decode_state->render_object;
1732
1733     if (obj_surface && obj_surface->private_data) {
1734         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1735     }
1736
1737     obj_surface = decode_state->reference_objects[1];
1738
1739     if (obj_surface && obj_surface->private_data) {
1740         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1741     }
1742
1743     BEGIN_BCS_BATCH(batch, 7);
1744     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1745
1746     if (dmv_write_buffer)
1747         OUT_BCS_RELOC64(batch, dmv_write_buffer,
1748                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1749                         0);
1750     else {
1751         OUT_BCS_BATCH(batch, 0);
1752         OUT_BCS_BATCH(batch, 0);
1753     }
1754
1755     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1756
1757     if (dmv_read_buffer)
1758         OUT_BCS_RELOC64(batch, dmv_read_buffer,
1759                         I915_GEM_DOMAIN_INSTRUCTION, 0,
1760                         0);
1761     else {
1762         OUT_BCS_BATCH(batch, 0);
1763         OUT_BCS_BATCH(batch, 0);
1764     }
1765
1766     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1767
1768     ADVANCE_BCS_BATCH(batch);
1769 }
1770
1771 static int
1772 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1773 {
1774     int out_slice_data_bit_offset;
1775     int slice_header_size = in_slice_data_bit_offset / 8;
1776     int i, j;
1777
1778     if (profile != 3)
1779         out_slice_data_bit_offset = in_slice_data_bit_offset;
1780     else {
1781         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1782             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1783                 i++, j += 2;
1784             }
1785         }
1786
1787         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1788     }
1789
1790     return out_slice_data_bit_offset;
1791 }
1792
1793 static void
1794 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1795                         VAPictureParameterBufferVC1 *pic_param,
1796                         VASliceParameterBufferVC1 *slice_param,
1797                         VASliceParameterBufferVC1 *next_slice_param,
1798                         dri_bo *slice_data_bo,
1799                         struct gen7_mfd_context *gen7_mfd_context)
1800 {
1801     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1802     int next_slice_start_vert_pos;
1803     int macroblock_offset;
1804     uint8_t *slice_data = NULL;
1805
1806     dri_bo_map(slice_data_bo, 0);
1807     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1808     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1809                                                                slice_param->macroblock_offset,
1810                                                                pic_param->sequence_fields.bits.profile);
1811     dri_bo_unmap(slice_data_bo);
1812
1813     if (next_slice_param)
1814         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1815     else
1816         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1817
1818     BEGIN_BCS_BATCH(batch, 5);
1819     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1820     OUT_BCS_BATCH(batch,
1821                   slice_param->slice_data_size - (macroblock_offset >> 3));
1822     OUT_BCS_BATCH(batch,
1823                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1824     OUT_BCS_BATCH(batch,
1825                   slice_param->slice_vertical_position << 16 |
1826                   next_slice_start_vert_pos << 0);
1827     OUT_BCS_BATCH(batch,
1828                   (macroblock_offset & 0x7));
1829     ADVANCE_BCS_BATCH(batch);
1830 }
1831
1832 static void
1833 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1834                             struct decode_state *decode_state,
1835                             struct gen7_mfd_context *gen7_mfd_context)
1836 {
1837     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1838     VAPictureParameterBufferVC1 *pic_param;
1839     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1840     dri_bo *slice_data_bo;
1841     int i, j;
1842
1843     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1844     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1845
1846     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1847     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1848     intel_batchbuffer_emit_mi_flush(batch);
1849     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1850     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1851     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1852     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1853     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1854     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1855     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1856
1857     for (j = 0; j < decode_state->num_slice_params; j++) {
1858         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1859         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1860         slice_data_bo = decode_state->slice_datas[j]->bo;
1861         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1862
1863         if (j == decode_state->num_slice_params - 1)
1864             next_slice_group_param = NULL;
1865         else
1866             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1867
1868         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1869             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1870
1871             if (i < decode_state->slice_params[j]->num_elements - 1)
1872                 next_slice_param = slice_param + 1;
1873             else
1874                 next_slice_param = next_slice_group_param;
1875
1876             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1877             slice_param++;
1878         }
1879     }
1880
1881     intel_batchbuffer_end_atomic(batch);
1882     intel_batchbuffer_flush(batch);
1883 }
1884
1885 static void
1886 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1887                           struct decode_state *decode_state,
1888                           struct gen7_mfd_context *gen7_mfd_context)
1889 {
1890     struct object_surface *obj_surface;
1891     VAPictureParameterBufferJPEGBaseline *pic_param;
1892     int subsampling = SUBSAMPLE_YUV420;
1893     int fourcc = VA_FOURCC_IMC3;
1894
1895     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1896
1897     if (pic_param->num_components == 1) {
1898         subsampling = SUBSAMPLE_YUV400;
1899         fourcc = VA_FOURCC_Y800;
1900     } else if (pic_param->num_components == 3) {
1901         int h1 = pic_param->components[0].h_sampling_factor;
1902         int h2 = pic_param->components[1].h_sampling_factor;
1903         int h3 = pic_param->components[2].h_sampling_factor;
1904         int v1 = pic_param->components[0].v_sampling_factor;
1905         int v2 = pic_param->components[1].v_sampling_factor;
1906         int v3 = pic_param->components[2].v_sampling_factor;
1907
1908         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1909             v1 == 2 && v2 == 1 && v3 == 1) {
1910             subsampling = SUBSAMPLE_YUV420;
1911             fourcc = VA_FOURCC_IMC3;
1912         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1913                    v1 == 1 && v2 == 1 && v3 == 1) {
1914             subsampling = SUBSAMPLE_YUV422H;
1915             fourcc = VA_FOURCC_422H;
1916         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1917                    v1 == 1 && v2 == 1 && v3 == 1) {
1918             subsampling = SUBSAMPLE_YUV444;
1919             fourcc = VA_FOURCC_444P;
1920         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1921                    v1 == 1 && v2 == 1 && v3 == 1) {
1922             subsampling = SUBSAMPLE_YUV411;
1923             fourcc = VA_FOURCC_411P;
1924         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1925                    v1 == 2 && v2 == 1 && v3 == 1) {
1926             subsampling = SUBSAMPLE_YUV422V;
1927             fourcc = VA_FOURCC_422V;
1928         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1929                    v1 == 2 && v2 == 2 && v3 == 2) {
1930             subsampling = SUBSAMPLE_YUV422H;
1931             fourcc = VA_FOURCC_422H;
1932         } else if (h1 == 2 && h2 == 2 && h3 == 2 &&
1933                    v1 == 2 && v2 == 1 && v3 == 1) {
1934             subsampling = SUBSAMPLE_YUV422V;
1935             fourcc = VA_FOURCC_422V;
1936         } else
1937             assert(0);
1938     } else {
1939         assert(0);
1940     }
1941
1942     /* Current decoded picture */
1943     obj_surface = decode_state->render_object;
1944     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1945
1946     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1947     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1948     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1949     gen7_mfd_context->pre_deblocking_output.valid = 1;
1950
1951     gen7_mfd_context->post_deblocking_output.bo = NULL;
1952     gen7_mfd_context->post_deblocking_output.valid = 0;
1953
1954     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1955     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1956
1957     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1958     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1959
1960     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1961     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1962
1963     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1964     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1965
1966     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1967     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1968 }
1969
1970 static const int va_to_gen7_jpeg_rotation[4] = {
1971     GEN7_JPEG_ROTATION_0,
1972     GEN7_JPEG_ROTATION_90,
1973     GEN7_JPEG_ROTATION_180,
1974     GEN7_JPEG_ROTATION_270
1975 };
1976
1977 static void
1978 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1979                         struct decode_state *decode_state,
1980                         struct gen7_mfd_context *gen7_mfd_context)
1981 {
1982     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1983     VAPictureParameterBufferJPEGBaseline *pic_param;
1984     int chroma_type = GEN7_YUV420;
1985     int frame_width_in_blks;
1986     int frame_height_in_blks;
1987
1988     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1989     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1990
1991     if (pic_param->num_components == 1)
1992         chroma_type = GEN7_YUV400;
1993     else if (pic_param->num_components == 3) {
1994         int h1 = pic_param->components[0].h_sampling_factor;
1995         int h2 = pic_param->components[1].h_sampling_factor;
1996         int h3 = pic_param->components[2].h_sampling_factor;
1997         int v1 = pic_param->components[0].v_sampling_factor;
1998         int v2 = pic_param->components[1].v_sampling_factor;
1999         int v3 = pic_param->components[2].v_sampling_factor;
2000
2001         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2002             v1 == 2 && v2 == 1 && v3 == 1)
2003             chroma_type = GEN7_YUV420;
2004         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2005                  v1 == 1 && v2 == 1 && v3 == 1)
2006             chroma_type = GEN7_YUV422H_2Y;
2007         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2008                  v1 == 1 && v2 == 1 && v3 == 1)
2009             chroma_type = GEN7_YUV444;
2010         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2011                  v1 == 1 && v2 == 1 && v3 == 1)
2012             chroma_type = GEN7_YUV411;
2013         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2014                  v1 == 2 && v2 == 1 && v3 == 1)
2015             chroma_type = GEN7_YUV422V_2Y;
2016         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2017                  v1 == 2 && v2 == 2 && v3 == 2)
2018             chroma_type = GEN7_YUV422H_4Y;
2019         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2020                  v1 == 2 && v2 == 1 && v3 == 1)
2021             chroma_type = GEN7_YUV422V_4Y;
2022         else
2023             assert(0);
2024     }
2025
2026     if (chroma_type == GEN7_YUV400 ||
2027         chroma_type == GEN7_YUV444 ||
2028         chroma_type == GEN7_YUV422V_2Y) {
2029         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2030         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2031     } else if (chroma_type == GEN7_YUV411) {
2032         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2033         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2034     } else {
2035         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2036         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2037     }
2038
2039     BEGIN_BCS_BATCH(batch, 3);
2040     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2041     OUT_BCS_BATCH(batch,
2042                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2043                   (chroma_type << 0));
2044     OUT_BCS_BATCH(batch,
2045                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2046                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2047     ADVANCE_BCS_BATCH(batch);
2048 }
2049
2050 static const int va_to_gen7_jpeg_hufftable[2] = {
2051     MFX_HUFFTABLE_ID_Y,
2052     MFX_HUFFTABLE_ID_UV
2053 };
2054
2055 static void
2056 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2057                                struct decode_state *decode_state,
2058                                struct gen7_mfd_context *gen7_mfd_context,
2059                                int num_tables)
2060 {
2061     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2062     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2063     int index;
2064
2065     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2066         return;
2067
2068     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2069
2070     for (index = 0; index < num_tables; index++) {
2071         int id = va_to_gen7_jpeg_hufftable[index];
2072         if (!huffman_table->load_huffman_table[index])
2073             continue;
2074         BEGIN_BCS_BATCH(batch, 53);
2075         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2076         OUT_BCS_BATCH(batch, id);
2077         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2078         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2079         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2080         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2081         ADVANCE_BCS_BATCH(batch);
2082     }
2083 }
2084
2085 static const int va_to_gen7_jpeg_qm[5] = {
2086     -1,
2087     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2088     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2089     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2090     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2091 };
2092
2093 static void
2094 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2095                        struct decode_state *decode_state,
2096                        struct gen7_mfd_context *gen7_mfd_context)
2097 {
2098     VAPictureParameterBufferJPEGBaseline *pic_param;
2099     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2100     int index;
2101
2102     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2103         return;
2104
2105     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2106     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2107
2108     assert(pic_param->num_components <= 3);
2109
2110     for (index = 0; index < pic_param->num_components; index++) {
2111         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2112         int qm_type;
2113         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2114         unsigned char raster_qm[64];
2115         int j;
2116
2117         if (id > 4 || id < 1)
2118             continue;
2119
2120         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2121             continue;
2122
2123         qm_type = va_to_gen7_jpeg_qm[id];
2124
2125         for (j = 0; j < 64; j++)
2126             raster_qm[zigzag_direct[j]] = qm[j];
2127
2128         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2129     }
2130 }
2131
2132 static void
2133 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2134                          VAPictureParameterBufferJPEGBaseline *pic_param,
2135                          VASliceParameterBufferJPEGBaseline *slice_param,
2136                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2137                          dri_bo *slice_data_bo,
2138                          struct gen7_mfd_context *gen7_mfd_context)
2139 {
2140     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2141     int scan_component_mask = 0;
2142     int i;
2143
2144     assert(slice_param->num_components > 0);
2145     assert(slice_param->num_components < 4);
2146     assert(slice_param->num_components <= pic_param->num_components);
2147
2148     for (i = 0; i < slice_param->num_components; i++) {
2149         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2150         case 1:
2151             scan_component_mask |= (1 << 0);
2152             break;
2153         case 2:
2154             scan_component_mask |= (1 << 1);
2155             break;
2156         case 3:
2157             scan_component_mask |= (1 << 2);
2158             break;
2159         default:
2160             assert(0);
2161             break;
2162         }
2163     }
2164
2165     BEGIN_BCS_BATCH(batch, 6);
2166     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2167     OUT_BCS_BATCH(batch,
2168                   slice_param->slice_data_size);
2169     OUT_BCS_BATCH(batch,
2170                   slice_param->slice_data_offset);
2171     OUT_BCS_BATCH(batch,
2172                   slice_param->slice_horizontal_position << 16 |
2173                   slice_param->slice_vertical_position << 0);
2174     OUT_BCS_BATCH(batch,
2175                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2176                   (scan_component_mask << 27) |                 /* scan components */
2177                   (0 << 26) |   /* disable interrupt allowed */
2178                   (slice_param->num_mcus << 0));                /* MCU count */
2179     OUT_BCS_BATCH(batch,
2180                   (slice_param->restart_interval << 0));    /* RestartInterval */
2181     ADVANCE_BCS_BATCH(batch);
2182 }
2183
2184 /* Workaround for JPEG decoding on Ivybridge */
2185 #ifdef JPEG_WA
2186
2187 static struct {
2188     int width;
2189     int height;
2190     unsigned char data[32];
2191     int data_size;
2192     int data_bit_offset;
2193     int qp;
2194 } gen7_jpeg_wa_clip = {
2195     16,
2196     16,
2197     {
2198         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2199         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2200     },
2201     14,
2202     40,
2203     28,
2204 };
2205
2206 static void
2207 gen8_jpeg_wa_init(VADriverContextP ctx,
2208                   struct gen7_mfd_context *gen7_mfd_context)
2209 {
2210     struct i965_driver_data *i965 = i965_driver_data(ctx);
2211     VAStatus status;
2212     struct object_surface *obj_surface;
2213
2214     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2215         i965_DestroySurfaces(ctx,
2216                              &gen7_mfd_context->jpeg_wa_surface_id,
2217                              1);
2218
2219     status = i965_CreateSurfaces(ctx,
2220                                  gen7_jpeg_wa_clip.width,
2221                                  gen7_jpeg_wa_clip.height,
2222                                  VA_RT_FORMAT_YUV420,
2223                                  1,
2224                                  &gen7_mfd_context->jpeg_wa_surface_id);
2225     assert(status == VA_STATUS_SUCCESS);
2226
2227     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2228     assert(obj_surface);
2229     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2230     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2231
2232     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2233         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2234                                                                "JPEG WA data",
2235                                                                0x1000,
2236                                                                0x1000);
2237         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2238                        0,
2239                        gen7_jpeg_wa_clip.data_size,
2240                        gen7_jpeg_wa_clip.data);
2241     }
2242 }
2243
2244 static void
2245 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2246                               struct gen7_mfd_context *gen7_mfd_context)
2247 {
2248     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2249
2250     BEGIN_BCS_BATCH(batch, 5);
2251     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2252     OUT_BCS_BATCH(batch,
2253                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2254                   (MFD_MODE_VLD << 15) | /* VLD mode */
2255                   (0 << 10) | /* disable Stream-Out */
2256                   (0 << 9)  | /* Post Deblocking Output */
2257                   (1 << 8)  | /* Pre Deblocking Output */
2258                   (0 << 5)  | /* not in stitch mode */
2259                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2260                   (MFX_FORMAT_AVC << 0));
2261     OUT_BCS_BATCH(batch,
2262                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2263                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2264                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2265                   (0 << 1)  |
2266                   (0 << 0));
2267     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2268     OUT_BCS_BATCH(batch, 0); /* reserved */
2269     ADVANCE_BCS_BATCH(batch);
2270 }
2271
2272 static void
2273 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2274                            struct gen7_mfd_context *gen7_mfd_context)
2275 {
2276     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2277     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2278
2279     BEGIN_BCS_BATCH(batch, 6);
2280     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2281     OUT_BCS_BATCH(batch, 0);
2282     OUT_BCS_BATCH(batch,
2283                   ((obj_surface->orig_width - 1) << 18) |
2284                   ((obj_surface->orig_height - 1) << 4));
2285     OUT_BCS_BATCH(batch,
2286                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2287                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2288                   (0 << 22) | /* surface object control state, ignored */
2289                   ((obj_surface->width - 1) << 3) | /* pitch */
2290                   (0 << 2)  | /* must be 0 */
2291                   (1 << 1)  | /* must be tiled */
2292                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2293     OUT_BCS_BATCH(batch,
2294                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2295                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2296     OUT_BCS_BATCH(batch,
2297                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2298                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2299     ADVANCE_BCS_BATCH(batch);
2300 }
2301
2302 static void
2303 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2304                                  struct gen7_mfd_context *gen7_mfd_context)
2305 {
2306     struct i965_driver_data *i965 = i965_driver_data(ctx);
2307     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2308     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2309     dri_bo *intra_bo;
2310     int i;
2311
2312     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2313                             "intra row store",
2314                             128 * 64,
2315                             0x1000);
2316
2317     BEGIN_BCS_BATCH(batch, 61);
2318     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2319     OUT_BCS_RELOC64(batch,
2320                     obj_surface->bo,
2321                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2322                     0);
2323     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2324
2325
2326     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2327     OUT_BCS_BATCH(batch, 0);
2328     OUT_BCS_BATCH(batch, 0);
2329
2330     /* uncompressed-video & stream out 7-12 */
2331     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2332     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2333     OUT_BCS_BATCH(batch, 0);
2334     OUT_BCS_BATCH(batch, 0);
2335     OUT_BCS_BATCH(batch, 0);
2336     OUT_BCS_BATCH(batch, 0);
2337
2338     /* the DW 13-15 is for intra row store scratch */
2339     OUT_BCS_RELOC64(batch,
2340                     intra_bo,
2341                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2342                     0);
2343
2344     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2345
2346     /* the DW 16-18 is for deblocking filter */
2347     OUT_BCS_BATCH(batch, 0);
2348     OUT_BCS_BATCH(batch, 0);
2349     OUT_BCS_BATCH(batch, 0);
2350
2351     /* DW 19..50 */
2352     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2353         OUT_BCS_BATCH(batch, 0);
2354         OUT_BCS_BATCH(batch, 0);
2355     }
2356     OUT_BCS_BATCH(batch, 0);
2357
2358     /* the DW52-54 is for mb status address */
2359     OUT_BCS_BATCH(batch, 0);
2360     OUT_BCS_BATCH(batch, 0);
2361     OUT_BCS_BATCH(batch, 0);
2362     /* the DW56-60 is for ILDB & second ILDB address */
2363     OUT_BCS_BATCH(batch, 0);
2364     OUT_BCS_BATCH(batch, 0);
2365     OUT_BCS_BATCH(batch, 0);
2366     OUT_BCS_BATCH(batch, 0);
2367     OUT_BCS_BATCH(batch, 0);
2368     OUT_BCS_BATCH(batch, 0);
2369
2370     ADVANCE_BCS_BATCH(batch);
2371
2372     dri_bo_unreference(intra_bo);
2373 }
2374
2375 static void
2376 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2377                                      struct gen7_mfd_context *gen7_mfd_context)
2378 {
2379     struct i965_driver_data *i965 = i965_driver_data(ctx);
2380     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2381     dri_bo *bsd_mpc_bo, *mpr_bo;
2382
2383     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2384                               "bsd mpc row store",
2385                               11520, /* 1.5 * 120 * 64 */
2386                               0x1000);
2387
2388     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2389                           "mpr row store",
2390                           7680, /* 1. 0 * 120 * 64 */
2391                           0x1000);
2392
2393     BEGIN_BCS_BATCH(batch, 10);
2394     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2395
2396     OUT_BCS_RELOC64(batch,
2397                     bsd_mpc_bo,
2398                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2399                     0);
2400
2401     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2402
2403     OUT_BCS_RELOC64(batch,
2404                     mpr_bo,
2405                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2406                     0);
2407     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2408
2409     OUT_BCS_BATCH(batch, 0);
2410     OUT_BCS_BATCH(batch, 0);
2411     OUT_BCS_BATCH(batch, 0);
2412
2413     ADVANCE_BCS_BATCH(batch);
2414
2415     dri_bo_unreference(bsd_mpc_bo);
2416     dri_bo_unreference(mpr_bo);
2417 }
2418
2419 static void
2420 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2421                           struct gen7_mfd_context *gen7_mfd_context)
2422 {
2423
2424 }
2425
2426 static void
2427 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2428                            struct gen7_mfd_context *gen7_mfd_context)
2429 {
2430     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2431     int img_struct = 0;
2432     int mbaff_frame_flag = 0;
2433     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2434
2435     BEGIN_BCS_BATCH(batch, 16);
2436     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2437     OUT_BCS_BATCH(batch,
2438                   width_in_mbs * height_in_mbs);
2439     OUT_BCS_BATCH(batch,
2440                   ((height_in_mbs - 1) << 16) |
2441                   ((width_in_mbs - 1) << 0));
2442     OUT_BCS_BATCH(batch,
2443                   (0 << 24) |
2444                   (0 << 16) |
2445                   (0 << 14) |
2446                   (0 << 13) |
2447                   (0 << 12) | /* differ from GEN6 */
2448                   (0 << 10) |
2449                   (img_struct << 8));
2450     OUT_BCS_BATCH(batch,
2451                   (1 << 10) | /* 4:2:0 */
2452                   (1 << 7) |  /* CABAC */
2453                   (0 << 6) |
2454                   (0 << 5) |
2455                   (0 << 4) |
2456                   (0 << 3) |
2457                   (1 << 2) |
2458                   (mbaff_frame_flag << 1) |
2459                   (0 << 0));
2460     OUT_BCS_BATCH(batch, 0);
2461     OUT_BCS_BATCH(batch, 0);
2462     OUT_BCS_BATCH(batch, 0);
2463     OUT_BCS_BATCH(batch, 0);
2464     OUT_BCS_BATCH(batch, 0);
2465     OUT_BCS_BATCH(batch, 0);
2466     OUT_BCS_BATCH(batch, 0);
2467     OUT_BCS_BATCH(batch, 0);
2468     OUT_BCS_BATCH(batch, 0);
2469     OUT_BCS_BATCH(batch, 0);
2470     OUT_BCS_BATCH(batch, 0);
2471     ADVANCE_BCS_BATCH(batch);
2472 }
2473
2474 static void
2475 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2476                                   struct gen7_mfd_context *gen7_mfd_context)
2477 {
2478     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2479     int i;
2480
2481     BEGIN_BCS_BATCH(batch, 71);
2482     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2483
2484     /* reference surfaces 0..15 */
2485     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2486         OUT_BCS_BATCH(batch, 0); /* top */
2487         OUT_BCS_BATCH(batch, 0); /* bottom */
2488     }
2489
2490     OUT_BCS_BATCH(batch, 0);
2491
2492     /* the current decoding frame/field */
2493     OUT_BCS_BATCH(batch, 0); /* top */
2494     OUT_BCS_BATCH(batch, 0);
2495     OUT_BCS_BATCH(batch, 0);
2496
2497     /* POC List */
2498     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2499         OUT_BCS_BATCH(batch, 0);
2500         OUT_BCS_BATCH(batch, 0);
2501     }
2502
2503     OUT_BCS_BATCH(batch, 0);
2504     OUT_BCS_BATCH(batch, 0);
2505
2506     ADVANCE_BCS_BATCH(batch);
2507 }
2508
2509 static void
2510 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2511                                      struct gen7_mfd_context *gen7_mfd_context)
2512 {
2513     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2514
2515     BEGIN_BCS_BATCH(batch, 11);
2516     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2517     OUT_BCS_RELOC64(batch,
2518                     gen7_mfd_context->jpeg_wa_slice_data_bo,
2519                     I915_GEM_DOMAIN_INSTRUCTION, 0,
2520                     0);
2521     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2522     OUT_BCS_BATCH(batch, 0);
2523     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2524     OUT_BCS_BATCH(batch, 0);
2525     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2526     OUT_BCS_BATCH(batch, 0);
2527     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2528     OUT_BCS_BATCH(batch, 0);
2529     ADVANCE_BCS_BATCH(batch);
2530 }
2531
2532 static void
2533 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2534                             struct gen7_mfd_context *gen7_mfd_context)
2535 {
2536     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2537
2538     /* the input bitsteam format on GEN7 differs from GEN6 */
2539     BEGIN_BCS_BATCH(batch, 6);
2540     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2541     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2542     OUT_BCS_BATCH(batch, 0);
2543     OUT_BCS_BATCH(batch,
2544                   (0 << 31) |
2545                   (0 << 14) |
2546                   (0 << 12) |
2547                   (0 << 10) |
2548                   (0 << 8));
2549     OUT_BCS_BATCH(batch,
2550                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2551                   (0 << 5)  |
2552                   (0 << 4)  |
2553                   (1 << 3) | /* LastSlice Flag */
2554                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2555     OUT_BCS_BATCH(batch, 0);
2556     ADVANCE_BCS_BATCH(batch);
2557 }
2558
2559 static void
2560 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2561                              struct gen7_mfd_context *gen7_mfd_context)
2562 {
2563     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2564     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2565     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2566     int first_mb_in_slice = 0;
2567     int slice_type = SLICE_TYPE_I;
2568
2569     BEGIN_BCS_BATCH(batch, 11);
2570     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2571     OUT_BCS_BATCH(batch, slice_type);
2572     OUT_BCS_BATCH(batch,
2573                   (num_ref_idx_l1 << 24) |
2574                   (num_ref_idx_l0 << 16) |
2575                   (0 << 8) |
2576                   (0 << 0));
2577     OUT_BCS_BATCH(batch,
2578                   (0 << 29) |
2579                   (1 << 27) |   /* disable Deblocking */
2580                   (0 << 24) |
2581                   (gen7_jpeg_wa_clip.qp << 16) |
2582                   (0 << 8) |
2583                   (0 << 0));
2584     OUT_BCS_BATCH(batch,
2585                   (slice_ver_pos << 24) |
2586                   (slice_hor_pos << 16) |
2587                   (first_mb_in_slice << 0));
2588     OUT_BCS_BATCH(batch,
2589                   (next_slice_ver_pos << 16) |
2590                   (next_slice_hor_pos << 0));
2591     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2592     OUT_BCS_BATCH(batch, 0);
2593     OUT_BCS_BATCH(batch, 0);
2594     OUT_BCS_BATCH(batch, 0);
2595     OUT_BCS_BATCH(batch, 0);
2596     ADVANCE_BCS_BATCH(batch);
2597 }
2598
2599 static void
2600 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2601                  struct gen7_mfd_context *gen7_mfd_context)
2602 {
2603     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2604     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2605     intel_batchbuffer_emit_mi_flush(batch);
2606     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2607     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2608     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2609     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2610     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2611     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2612     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2613
2614     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2615     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2616     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2617 }
2618
2619 #endif
2620
2621 void
2622 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2623                              struct decode_state *decode_state,
2624                              struct gen7_mfd_context *gen7_mfd_context)
2625 {
2626     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2627     VAPictureParameterBufferJPEGBaseline *pic_param;
2628     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2629     dri_bo *slice_data_bo;
2630     int i, j, max_selector = 0;
2631
2632     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2633     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2634
2635     /* Currently only support Baseline DCT */
2636     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2637     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2638 #ifdef JPEG_WA
2639     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2640 #endif
2641     intel_batchbuffer_emit_mi_flush(batch);
2642     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2643     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2644     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2645     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2646     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2647
2648     for (j = 0; j < decode_state->num_slice_params; j++) {
2649         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2650         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2651         slice_data_bo = decode_state->slice_datas[j]->bo;
2652         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2653
2654         if (j == decode_state->num_slice_params - 1)
2655             next_slice_group_param = NULL;
2656         else
2657             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2658
2659         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2660             int component;
2661
2662             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2663
2664             if (i < decode_state->slice_params[j]->num_elements - 1)
2665                 next_slice_param = slice_param + 1;
2666             else
2667                 next_slice_param = next_slice_group_param;
2668
2669             for (component = 0; component < slice_param->num_components; component++) {
2670                 if (max_selector < slice_param->components[component].dc_table_selector)
2671                     max_selector = slice_param->components[component].dc_table_selector;
2672
2673                 if (max_selector < slice_param->components[component].ac_table_selector)
2674                     max_selector = slice_param->components[component].ac_table_selector;
2675             }
2676
2677             slice_param++;
2678         }
2679     }
2680
2681     assert(max_selector < 2);
2682     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2683
2684     for (j = 0; j < decode_state->num_slice_params; j++) {
2685         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2686         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2687         slice_data_bo = decode_state->slice_datas[j]->bo;
2688         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2689
2690         if (j == decode_state->num_slice_params - 1)
2691             next_slice_group_param = NULL;
2692         else
2693             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2694
2695         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2696             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2697
2698             if (i < decode_state->slice_params[j]->num_elements - 1)
2699                 next_slice_param = slice_param + 1;
2700             else
2701                 next_slice_param = next_slice_group_param;
2702
2703             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2704             slice_param++;
2705         }
2706     }
2707
2708     intel_batchbuffer_end_atomic(batch);
2709     intel_batchbuffer_flush(batch);
2710 }
2711
2712 static const int vp8_dc_qlookup[128] = {
2713     4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2714     18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2715     29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2716     44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2717     59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2718     75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2719     91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2720     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2721 };
2722
2723 static const int vp8_ac_qlookup[128] = {
2724     4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2725     20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2726     36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2727     52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2728     78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2729     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2730     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2731     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2732 };
2733
2734 static inline unsigned int vp8_clip_quantization_index(int index)
2735 {
2736     if (index > 127)
2737         return 127;
2738     else if (index < 0)
2739         return 0;
2740
2741     return index;
2742 }
2743
2744 static void
2745 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2746                          struct decode_state *decode_state,
2747                          struct gen7_mfd_context *gen7_mfd_context)
2748 {
2749     struct object_surface *obj_surface;
2750     struct i965_driver_data *i965 = i965_driver_data(ctx);
2751     dri_bo *bo;
2752     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2753     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2754     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2755
2756     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2757     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2758
2759     intel_update_vp8_frame_store_index(ctx,
2760                                        decode_state,
2761                                        pic_param,
2762                                        gen7_mfd_context->reference_surface);
2763
2764     /* Current decoded picture */
2765     obj_surface = decode_state->render_object;
2766     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2767
2768     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2769     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2770     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2771     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2772
2773     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2774     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2775     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2776     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2777
2778     intel_ensure_vp8_segmentation_buffer(ctx,
2779                                          &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2780
2781     /* The same as AVC */
2782     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2783     bo = dri_bo_alloc(i965->intel.bufmgr,
2784                       "intra row store",
2785                       width_in_mbs * 64,
2786                       0x1000);
2787     assert(bo);
2788     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2789     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2790
2791     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2792     bo = dri_bo_alloc(i965->intel.bufmgr,
2793                       "deblocking filter row store",
2794                       width_in_mbs * 64 * 4,
2795                       0x1000);
2796     assert(bo);
2797     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2798     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2799
2800     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2801     bo = dri_bo_alloc(i965->intel.bufmgr,
2802                       "bsd mpc row store",
2803                       width_in_mbs * 64 * 2,
2804                       0x1000);
2805     assert(bo);
2806     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2807     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2808
2809     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2810     bo = dri_bo_alloc(i965->intel.bufmgr,
2811                       "mpr row store",
2812                       width_in_mbs * 64 * 2,
2813                       0x1000);
2814     assert(bo);
2815     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2816     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2817
2818     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2819 }
2820
2821 static void
2822 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2823                        struct decode_state *decode_state,
2824                        struct gen7_mfd_context *gen7_mfd_context)
2825 {
2826     struct i965_driver_data *i965 = i965_driver_data(ctx);
2827     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2828     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2829     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2830     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2831     dri_bo *probs_bo = decode_state->probability_data->bo;
2832     int i, j, log2num;
2833     unsigned int quantization_value[4][6];
2834
2835     /* There is no safe way to error out if the segmentation buffer
2836        could not be allocated. So, instead of aborting, simply decode
2837        something even if the result may look totally inacurate */
2838     const unsigned int enable_segmentation =
2839         pic_param->pic_fields.bits.segmentation_enabled &&
2840         gen7_mfd_context->segmentation_buffer.valid;
2841
2842     log2num = (int)log2(slice_param->num_of_partitions - 1);
2843
2844     BEGIN_BCS_BATCH(batch, 38);
2845     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2846     OUT_BCS_BATCH(batch,
2847                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2848                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2849     OUT_BCS_BATCH(batch,
2850                   log2num << 24 |
2851                   pic_param->pic_fields.bits.sharpness_level << 16 |
2852                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2853                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2854                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2855                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2856                   (enable_segmentation &&
2857                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2858                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2859                   (enable_segmentation &&
2860                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2861                   (enable_segmentation &&
2862                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2863                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2864                   pic_param->pic_fields.bits.filter_type << 4 |
2865                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2866                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2867
2868     OUT_BCS_BATCH(batch,
2869                   pic_param->loop_filter_level[3] << 24 |
2870                   pic_param->loop_filter_level[2] << 16 |
2871                   pic_param->loop_filter_level[1] <<  8 |
2872                   pic_param->loop_filter_level[0] <<  0);
2873
2874     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2875     for (i = 0; i < 4; i++) {
2876         quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2877         quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2878         quantization_value[i][2] = 2 * vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])]; /*y2dc*/
2879         /* 101581>>16 is equivalent to 155/100 */
2880         quantization_value[i][3] = (101581 * vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16; /*y2ac*/
2881         quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2882         quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2883
2884         quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2885         quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2886
2887         OUT_BCS_BATCH(batch,
2888                       quantization_value[i][0] << 16 | /* Y1AC */
2889                       quantization_value[i][1] <<  0); /* Y1DC */
2890         OUT_BCS_BATCH(batch,
2891                       quantization_value[i][5] << 16 | /* UVAC */
2892                       quantization_value[i][4] <<  0); /* UVDC */
2893         OUT_BCS_BATCH(batch,
2894                       quantization_value[i][3] << 16 | /* Y2AC */
2895                       quantization_value[i][2] <<  0); /* Y2DC */
2896     }
2897
2898     /* CoeffProbability table for non-key frame, DW16-DW18 */
2899     if (probs_bo) {
2900         OUT_BCS_RELOC64(batch, probs_bo,
2901                         0, I915_GEM_DOMAIN_INSTRUCTION,
2902                         0);
2903         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2904     } else {
2905         OUT_BCS_BATCH(batch, 0);
2906         OUT_BCS_BATCH(batch, 0);
2907         OUT_BCS_BATCH(batch, 0);
2908     }
2909
2910     OUT_BCS_BATCH(batch,
2911                   pic_param->mb_segment_tree_probs[2] << 16 |
2912                   pic_param->mb_segment_tree_probs[1] <<  8 |
2913                   pic_param->mb_segment_tree_probs[0] <<  0);
2914
2915     OUT_BCS_BATCH(batch,
2916                   pic_param->prob_skip_false << 24 |
2917                   pic_param->prob_intra      << 16 |
2918                   pic_param->prob_last       <<  8 |
2919                   pic_param->prob_gf         <<  0);
2920
2921     OUT_BCS_BATCH(batch,
2922                   pic_param->y_mode_probs[3] << 24 |
2923                   pic_param->y_mode_probs[2] << 16 |
2924                   pic_param->y_mode_probs[1] <<  8 |
2925                   pic_param->y_mode_probs[0] <<  0);
2926
2927     OUT_BCS_BATCH(batch,
2928                   pic_param->uv_mode_probs[2] << 16 |
2929                   pic_param->uv_mode_probs[1] <<  8 |
2930                   pic_param->uv_mode_probs[0] <<  0);
2931
2932     /* MV update value, DW23-DW32 */
2933     for (i = 0; i < 2; i++) {
2934         for (j = 0; j < 20; j += 4) {
2935             OUT_BCS_BATCH(batch,
2936                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2937                           pic_param->mv_probs[i][j + 2] << 16 |
2938                           pic_param->mv_probs[i][j + 1] <<  8 |
2939                           pic_param->mv_probs[i][j + 0] <<  0);
2940         }
2941     }
2942
2943     OUT_BCS_BATCH(batch,
2944                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2945                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2946                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2947                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2948
2949     OUT_BCS_BATCH(batch,
2950                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2951                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2952                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2953                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2954
2955     /* segmentation id stream base address, DW35-DW37 */
2956     if (enable_segmentation) {
2957         OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
2958                         0, I915_GEM_DOMAIN_INSTRUCTION,
2959                         0);
2960         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2961     } else {
2962         OUT_BCS_BATCH(batch, 0);
2963         OUT_BCS_BATCH(batch, 0);
2964         OUT_BCS_BATCH(batch, 0);
2965     }
2966     ADVANCE_BCS_BATCH(batch);
2967 }
2968
2969 static void
2970 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2971                         VAPictureParameterBufferVP8 *pic_param,
2972                         VASliceParameterBufferVP8 *slice_param,
2973                         dri_bo *slice_data_bo,
2974                         struct gen7_mfd_context *gen7_mfd_context)
2975 {
2976     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2977     int i, log2num;
2978     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7) >> 3);
2979     unsigned int used_bits = 8 - pic_param->bool_coder_ctx.count;
2980     unsigned int partition_size_0 = slice_param->partition_size[0];
2981
2982     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2983     if (used_bits == 8) {
2984         used_bits = 0;
2985         offset += 1;
2986         partition_size_0 -= 1;
2987     }
2988
2989     assert(slice_param->num_of_partitions >= 2);
2990     assert(slice_param->num_of_partitions <= 9);
2991
2992     log2num = (int)log2(slice_param->num_of_partitions - 1);
2993
2994     BEGIN_BCS_BATCH(batch, 22);
2995     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2996     OUT_BCS_BATCH(batch,
2997                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2998                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2999                   log2num << 4 |
3000                   (slice_param->macroblock_offset & 0x7));
3001     OUT_BCS_BATCH(batch,
3002                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3003                   0);
3004
3005     OUT_BCS_BATCH(batch, partition_size_0 + 1);
3006     OUT_BCS_BATCH(batch, offset);
3007     //partion sizes in bytes are present after the above first partition when there are more than one token partition
3008     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3009     for (i = 1; i < 9; i++) {
3010         if (i < slice_param->num_of_partitions) {
3011             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3012             OUT_BCS_BATCH(batch, offset);
3013         } else {
3014             OUT_BCS_BATCH(batch, 0);
3015             OUT_BCS_BATCH(batch, 0);
3016         }
3017
3018         offset += slice_param->partition_size[i];
3019     }
3020
3021     OUT_BCS_BATCH(batch, 0); /* concealment method */
3022
3023     ADVANCE_BCS_BATCH(batch);
3024 }
3025
3026 void
3027 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3028                             struct decode_state *decode_state,
3029                             struct gen7_mfd_context *gen7_mfd_context)
3030 {
3031     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3032     VAPictureParameterBufferVP8 *pic_param;
3033     VASliceParameterBufferVP8 *slice_param;
3034     dri_bo *slice_data_bo;
3035
3036     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3037     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3038
3039     /* one slice per frame */
3040     if (decode_state->num_slice_params != 1 ||
3041         (!decode_state->slice_params ||
3042          !decode_state->slice_params[0] ||
3043          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3044         (!decode_state->slice_datas ||
3045          !decode_state->slice_datas[0] ||
3046          !decode_state->slice_datas[0]->bo) ||
3047         !decode_state->probability_data) {
3048         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3049
3050         return;
3051     }
3052
3053     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3054     slice_data_bo = decode_state->slice_datas[0]->bo;
3055
3056     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3057     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3058     intel_batchbuffer_emit_mi_flush(batch);
3059     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3060     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3061     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3062     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3063     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3064     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3065     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3066     intel_batchbuffer_end_atomic(batch);
3067     intel_batchbuffer_flush(batch);
3068 }
3069
3070 static VAStatus
3071 gen8_mfd_decode_picture(VADriverContextP ctx,
3072                         VAProfile profile,
3073                         union codec_state *codec_state,
3074                         struct hw_context *hw_context)
3075
3076 {
3077     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3078     struct decode_state *decode_state = &codec_state->decode;
3079     VAStatus vaStatus;
3080
3081     assert(gen7_mfd_context);
3082
3083     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3084
3085     if (vaStatus != VA_STATUS_SUCCESS)
3086         goto out;
3087
3088     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3089
3090     switch (profile) {
3091     case VAProfileMPEG2Simple:
3092     case VAProfileMPEG2Main:
3093         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3094         break;
3095
3096     case VAProfileH264ConstrainedBaseline:
3097     case VAProfileH264Main:
3098     case VAProfileH264High:
3099     case VAProfileH264StereoHigh:
3100     case VAProfileH264MultiviewHigh:
3101         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3102         break;
3103
3104     case VAProfileVC1Simple:
3105     case VAProfileVC1Main:
3106     case VAProfileVC1Advanced:
3107         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3108         break;
3109
3110     case VAProfileJPEGBaseline:
3111         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3112         break;
3113
3114     case VAProfileVP8Version0_3:
3115         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3116         break;
3117
3118     default:
3119         assert(0);
3120         break;
3121     }
3122
3123     vaStatus = VA_STATUS_SUCCESS;
3124
3125 out:
3126     return vaStatus;
3127 }
3128
3129 static void
3130 gen8_mfd_context_destroy(void *hw_context)
3131 {
3132     VADriverContextP ctx;
3133     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3134
3135     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3136
3137     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3138     gen7_mfd_context->post_deblocking_output.bo = NULL;
3139
3140     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3141     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3142
3143     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3144     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3145
3146     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3147     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3148
3149     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3150     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3151
3152     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3153     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3154
3155     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3156     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3157
3158     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3159     gen7_mfd_context->segmentation_buffer.bo = NULL;
3160
3161     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3162
3163     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3164         i965_DestroySurfaces(ctx,
3165                              &gen7_mfd_context->jpeg_wa_surface_id,
3166                              1);
3167         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3168     }
3169
3170     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3171     free(gen7_mfd_context);
3172 }
3173
3174 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3175                                         struct gen7_mfd_context *gen7_mfd_context)
3176 {
3177     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3178     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3179     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3180     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3181 }
3182
3183 struct hw_context *
3184 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3185 {
3186     struct intel_driver_data *intel = intel_driver_data(ctx);
3187     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3188     int i;
3189
3190     if (!gen7_mfd_context)
3191         return NULL;
3192
3193     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3194     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3195     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3196
3197     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3198         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3199         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3200     }
3201
3202     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3203     gen7_mfd_context->segmentation_buffer.valid = 0;
3204
3205     switch (obj_config->profile) {
3206     case VAProfileMPEG2Simple:
3207     case VAProfileMPEG2Main:
3208         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3209         break;
3210
3211     case VAProfileH264ConstrainedBaseline:
3212     case VAProfileH264Main:
3213     case VAProfileH264High:
3214     case VAProfileH264StereoHigh:
3215     case VAProfileH264MultiviewHigh:
3216         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3217         break;
3218     default:
3219         break;
3220     }
3221
3222     gen7_mfd_context->driver_context = ctx;
3223     return (struct hw_context *)gen7_mfd_context;
3224 }