OSDN Git Service

build: automake options in configure.ac
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV     2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77
78         if (!gen7_avc_surface)
79             return;
80
81         gen7_avc_surface->base.frame_store_id = -1;
82         assert((obj_surface->size & 0x3f) == 0);
83         obj_surface->private_data = gen7_avc_surface;
84     }
85
86     /* DMV buffers now relate to the whole frame, irrespective of
87        field coding modes */
88     if (gen7_avc_surface->dmv_top == NULL) {
89         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90                                                  "direct mv w/r buffer",
91                                                  width_in_mbs * height_in_mbs * 128,
92                                                  0x1000);
93         assert(gen7_avc_surface->dmv_top);
94     }
95 }
96
97 static void
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99                           struct decode_state *decode_state,
100                           int standard_select,
101                           struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG ||
109            standard_select == MFX_FORMAT_VP8);
110
111     BEGIN_BCS_BATCH(batch, 5);
112     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
113     OUT_BCS_BATCH(batch,
114                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
115                   (MFD_MODE_VLD << 15) | /* VLD mode */
116                   (0 << 10) | /* disable Stream-Out */
117                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
118                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
119                   (0 << 5)  | /* not in stitch mode */
120                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
121                   (standard_select << 0));
122     OUT_BCS_BATCH(batch,
123                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
124                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
125                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
126                   (0 << 1)  |
127                   (0 << 0));
128     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
129     OUT_BCS_BATCH(batch, 0); /* reserved */
130     ADVANCE_BCS_BATCH(batch);
131 }
132
133 static void
134 gen8_mfd_surface_state(VADriverContextP ctx,
135                        struct decode_state *decode_state,
136                        int standard_select,
137                        struct gen7_mfd_context *gen7_mfd_context)
138 {
139     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140     struct object_surface *obj_surface = decode_state->render_object;
141     unsigned int y_cb_offset;
142     unsigned int y_cr_offset;
143     unsigned int surface_format;
144
145     assert(obj_surface);
146
147     y_cb_offset = obj_surface->y_cb_offset;
148     y_cr_offset = obj_surface->y_cr_offset;
149
150     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151                      MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
152
153     BEGIN_BCS_BATCH(batch, 6);
154     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155     OUT_BCS_BATCH(batch, 0);
156     OUT_BCS_BATCH(batch,
157                   ((obj_surface->orig_height - 1) << 18) |
158                   ((obj_surface->orig_width - 1) << 4));
159     OUT_BCS_BATCH(batch,
160                   (surface_format << 28) | /* 420 planar YUV surface */
161                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162                   (0 << 22) | /* surface object control state, ignored */
163                   ((obj_surface->width - 1) << 3) | /* pitch */
164                   (0 << 2)  | /* must be 0 */
165                   (1 << 1)  | /* must be tiled */
166                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
167     OUT_BCS_BATCH(batch,
168                   (0 << 16) | /* X offset for U(Cb), must be 0 */
169                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
170     OUT_BCS_BATCH(batch,
171                   (0 << 16) | /* X offset for V(Cr), must be 0 */
172                   ((standard_select == MFX_FORMAT_JPEG ? y_cr_offset : 0) << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173     ADVANCE_BCS_BATCH(batch);
174 }
175
176 static void
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178                              struct decode_state *decode_state,
179                              int standard_select,
180                              struct gen7_mfd_context *gen7_mfd_context)
181 {
182     struct i965_driver_data *i965 = i965_driver_data(ctx);
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188     /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                         0);
193     else {
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197     }
198     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
199
200     /* Post-debloing 4-6 */
201     if (gen7_mfd_context->post_deblocking_output.valid)
202         OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
203                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                         0);
205     else {
206         OUT_BCS_BATCH(batch, 0);
207
208         OUT_BCS_BATCH(batch, 0);
209     }
210     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
211
212     /* uncompressed-video & stream out 7-12 */
213     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
214     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
215     OUT_BCS_BATCH(batch, 0);
216     OUT_BCS_BATCH(batch, 0);
217     OUT_BCS_BATCH(batch, 0);
218     OUT_BCS_BATCH(batch, 0);
219
220     /* intra row-store scratch 13-15 */
221     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
222         OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
223                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
224                         0);
225     else {
226         OUT_BCS_BATCH(batch, 0);
227
228         OUT_BCS_BATCH(batch, 0);
229     }
230     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
231
232     /* deblocking-filter-row-store 16-18 */
233     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
234         OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
235                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
236                         0);
237     else {
238         OUT_BCS_BATCH(batch, 0);
239         OUT_BCS_BATCH(batch, 0);
240     }
241
242     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
243
244     /* DW 19..50 */
245     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
246         struct object_surface *obj_surface;
247
248         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
249             gen7_mfd_context->reference_surface[i].obj_surface &&
250             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
251             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
252
253             OUT_BCS_RELOC64(batch, obj_surface->bo,
254                             I915_GEM_DOMAIN_INSTRUCTION, 0,
255                             0);
256         } else {
257             OUT_BCS_BATCH(batch, 0);
258             OUT_BCS_BATCH(batch, 0);
259         }
260
261     }
262
263     /* reference property 51 */
264     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
265
266     /* Macroblock status & ILDB 52-57 */
267     OUT_BCS_BATCH(batch, 0);
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271     OUT_BCS_BATCH(batch, 0);
272     OUT_BCS_BATCH(batch, 0);
273
274     /* the second Macroblock status 58-60 */
275     OUT_BCS_BATCH(batch, 0);
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch, 0);
278
279     ADVANCE_BCS_BATCH(batch);
280 }
281
282 static void
283 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
284                                  dri_bo *slice_data_bo,
285                                  int standard_select,
286                                  struct gen7_mfd_context *gen7_mfd_context)
287 {
288     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
289     struct i965_driver_data *i965 = i965_driver_data(ctx);
290
291     BEGIN_BCS_BATCH(batch, 26);
292     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
293     /* MFX In BS 1-5 */
294     OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
295     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
296     /* Upper bound 4-5 */
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299
300     /* MFX indirect MV 6-10 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307     /* MFX IT_COFF 11-15 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314     /* MFX IT_DBLK 16-20 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321     /* MFX PAK_BSE object for encoder 21-25 */
322     OUT_BCS_BATCH(batch, 0);
323     OUT_BCS_BATCH(batch, 0);
324     OUT_BCS_BATCH(batch, 0);
325     OUT_BCS_BATCH(batch, 0);
326     OUT_BCS_BATCH(batch, 0);
327
328     ADVANCE_BCS_BATCH(batch);
329 }
330
331 static void
332 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
333                                  struct decode_state *decode_state,
334                                  int standard_select,
335                                  struct gen7_mfd_context *gen7_mfd_context)
336 {
337     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
338     struct i965_driver_data *i965 = i965_driver_data(ctx);
339
340     BEGIN_BCS_BATCH(batch, 10);
341     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
342
343     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
344         OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
345                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
346                         0);
347     else {
348         OUT_BCS_BATCH(batch, 0);
349         OUT_BCS_BATCH(batch, 0);
350     }
351
352     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
353     /* MPR Row Store Scratch buffer 4-6 */
354     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
355         OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
356                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
357                         0);
358     else {
359         OUT_BCS_BATCH(batch, 0);
360         OUT_BCS_BATCH(batch, 0);
361     }
362
363     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
364
365     /* Bitplane 7-9 */
366     if (gen7_mfd_context->bitplane_read_buffer.valid)
367         OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
368                         I915_GEM_DOMAIN_INSTRUCTION, 0,
369                         0);
370     else {
371         OUT_BCS_BATCH(batch, 0);
372         OUT_BCS_BATCH(batch, 0);
373     }
374     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
375     ADVANCE_BCS_BATCH(batch);
376 }
377
378 static void
379 gen8_mfd_qm_state(VADriverContextP ctx,
380                   int qm_type,
381                   unsigned char *qm,
382                   int qm_length,
383                   struct gen7_mfd_context *gen7_mfd_context)
384 {
385     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
386     unsigned int qm_buffer[16];
387
388     assert(qm_length <= 16 * 4);
389     memcpy(qm_buffer, qm, qm_length);
390
391     BEGIN_BCS_BATCH(batch, 18);
392     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
393     OUT_BCS_BATCH(batch, qm_type << 0);
394     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
395     ADVANCE_BCS_BATCH(batch);
396 }
397
398 static void
399 gen8_mfd_avc_img_state(VADriverContextP ctx,
400                        struct decode_state *decode_state,
401                        struct gen7_mfd_context *gen7_mfd_context)
402 {
403     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
404     int img_struct;
405     int mbaff_frame_flag;
406     unsigned int width_in_mbs, height_in_mbs;
407     VAPictureParameterBufferH264 *pic_param;
408
409     assert(decode_state->pic_param && decode_state->pic_param->buffer);
410     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
412
413     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
414         img_struct = 1;
415     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
416         img_struct = 3;
417     else
418         img_struct = 0;
419
420     if ((img_struct & 0x1) == 0x1) {
421         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
422     } else {
423         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
424     }
425
426     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
427         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
428         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
429     } else {
430         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
431     }
432
433     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
434                         !pic_param->pic_fields.bits.field_pic_flag);
435
436     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
437     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
438
439     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
440     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
441            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
442     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
443
444     BEGIN_BCS_BATCH(batch, 17);
445     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
446     OUT_BCS_BATCH(batch,
447                   (width_in_mbs * height_in_mbs - 1));
448     OUT_BCS_BATCH(batch,
449                   ((height_in_mbs - 1) << 16) |
450                   ((width_in_mbs - 1) << 0));
451     OUT_BCS_BATCH(batch,
452                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
453                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
454                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
455                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
456                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
457                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
458                   (img_struct << 8));
459     OUT_BCS_BATCH(batch,
460                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
461                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
462                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
463                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
464                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
465                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
466                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
467                   (mbaff_frame_flag << 1) |
468                   (pic_param->pic_fields.bits.field_pic_flag << 0));
469     OUT_BCS_BATCH(batch, 0);
470     OUT_BCS_BATCH(batch, 0);
471     OUT_BCS_BATCH(batch, 0);
472     OUT_BCS_BATCH(batch, 0);
473     OUT_BCS_BATCH(batch, 0);
474     OUT_BCS_BATCH(batch, 0);
475     OUT_BCS_BATCH(batch, 0);
476     OUT_BCS_BATCH(batch, 0);
477     OUT_BCS_BATCH(batch, 0);
478     OUT_BCS_BATCH(batch, 0);
479     OUT_BCS_BATCH(batch, 0);
480     OUT_BCS_BATCH(batch, 0);
481     ADVANCE_BCS_BATCH(batch);
482 }
483
484 static void
485 gen8_mfd_avc_qm_state(VADriverContextP ctx,
486                       struct decode_state *decode_state,
487                       struct gen7_mfd_context *gen7_mfd_context)
488 {
489     VAIQMatrixBufferH264 *iq_matrix;
490     VAPictureParameterBufferH264 *pic_param;
491
492     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
493         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
494     else
495         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
496
497     assert(decode_state->pic_param && decode_state->pic_param->buffer);
498     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
499
500     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
501     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
502
503     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
504         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
505         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
506     }
507 }
508
509 static inline void
510 gen8_mfd_avc_picid_state(VADriverContextP ctx,
511                          struct decode_state *decode_state,
512                          struct gen7_mfd_context *gen7_mfd_context)
513 {
514     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
515                                gen7_mfd_context->reference_surface);
516 }
517
518 static void
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520                               struct decode_state *decode_state,
521                               VAPictureParameterBufferH264 *pic_param,
522                               VASliceParameterBufferH264 *slice_param,
523                               struct gen7_mfd_context *gen7_mfd_context)
524 {
525     struct i965_driver_data *i965 = i965_driver_data(ctx);
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
545                             I915_GEM_DOMAIN_INSTRUCTION, 0,
546                             0);
547         } else {
548             OUT_BCS_BATCH(batch, 0);
549             OUT_BCS_BATCH(batch, 0);
550         }
551     }
552
553     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
554
555     /* the current decoding frame/field */
556     va_pic = &pic_param->CurrPic;
557     obj_surface = decode_state->render_object;
558     assert(obj_surface->bo && obj_surface->private_data);
559     gen7_avc_surface = obj_surface->private_data;
560
561     OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
562                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
563                     0);
564
565     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
566
567     /* POC List */
568     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
569         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
570
571         if (obj_surface) {
572             const VAPictureH264 * const va_pic = avc_find_picture(
573                                                      obj_surface->base.id, pic_param->ReferenceFrames,
574                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
575
576             assert(va_pic != NULL);
577             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
578             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
579         } else {
580             OUT_BCS_BATCH(batch, 0);
581             OUT_BCS_BATCH(batch, 0);
582         }
583     }
584
585     va_pic = &pic_param->CurrPic;
586     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
587     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
588
589     ADVANCE_BCS_BATCH(batch);
590 }
591
592 static void
593 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
594                                  VAPictureParameterBufferH264 *pic_param,
595                                  VASliceParameterBufferH264 *next_slice_param,
596                                  struct gen7_mfd_context *gen7_mfd_context)
597 {
598     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
599 }
600
601 static void
602 gen8_mfd_avc_slice_state(VADriverContextP ctx,
603                          VAPictureParameterBufferH264 *pic_param,
604                          VASliceParameterBufferH264 *slice_param,
605                          VASliceParameterBufferH264 *next_slice_param,
606                          struct gen7_mfd_context *gen7_mfd_context)
607 {
608     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
609     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
610     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
611     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
612     int num_ref_idx_l0, num_ref_idx_l1;
613     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
614                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
615     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
616     int slice_type;
617
618     if (slice_param->slice_type == SLICE_TYPE_I ||
619         slice_param->slice_type == SLICE_TYPE_SI) {
620         slice_type = SLICE_TYPE_I;
621     } else if (slice_param->slice_type == SLICE_TYPE_P ||
622                slice_param->slice_type == SLICE_TYPE_SP) {
623         slice_type = SLICE_TYPE_P;
624     } else {
625         assert(slice_param->slice_type == SLICE_TYPE_B);
626         slice_type = SLICE_TYPE_B;
627     }
628
629     if (slice_type == SLICE_TYPE_I) {
630         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
631         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
632         num_ref_idx_l0 = 0;
633         num_ref_idx_l1 = 0;
634     } else if (slice_type == SLICE_TYPE_P) {
635         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
637         num_ref_idx_l1 = 0;
638     } else {
639         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
640         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
641     }
642
643     first_mb_in_slice = slice_param->first_mb_in_slice;
644     slice_hor_pos = first_mb_in_slice % width_in_mbs;
645     slice_ver_pos = first_mb_in_slice / width_in_mbs;
646
647     if (mbaff_picture)
648         slice_ver_pos = slice_ver_pos << 1;
649     if (next_slice_param) {
650         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
651         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
652         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
653
654         if (mbaff_picture)
655             next_slice_ver_pos = next_slice_ver_pos << 1;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch,
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch,
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch,
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) |
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch,
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch,
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
839                                        gen7_mfd_context->reference_surface);
840     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
841     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
842     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
843     assert(height_in_mbs > 0 && height_in_mbs <= 256);
844
845     /* Current decoded picture */
846     obj_surface = decode_state->render_object;
847     if (pic_param->pic_fields.bits.reference_pic_flag)
848         obj_surface->flags |= SURFACE_REFERENCED;
849     else
850         obj_surface->flags &= ~SURFACE_REFERENCED;
851
852     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
853     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
854
855     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
856     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
857     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
858     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
859
860     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
861     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
862     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
863     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
864
865     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
866     bo = dri_bo_alloc(i965->intel.bufmgr,
867                       "intra row store",
868                       width_in_mbs * 64,
869                       0x1000);
870     assert(bo);
871     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
872     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
873
874     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
875     bo = dri_bo_alloc(i965->intel.bufmgr,
876                       "deblocking filter row store",
877                       width_in_mbs * 64 * 4,
878                       0x1000);
879     assert(bo);
880     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
881     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
882
883     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
884     bo = dri_bo_alloc(i965->intel.bufmgr,
885                       "bsd mpc row store",
886                       width_in_mbs * 64 * 2,
887                       0x1000);
888     assert(bo);
889     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
890     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
891
892     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
893     bo = dri_bo_alloc(i965->intel.bufmgr,
894                       "mpr row store",
895                       width_in_mbs * 64 * 2,
896                       0x1000);
897     assert(bo);
898     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
899     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
900
901     gen7_mfd_context->bitplane_read_buffer.valid = 0;
902 }
903
904 static void
905 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
906                             struct decode_state *decode_state,
907                             struct gen7_mfd_context *gen7_mfd_context)
908 {
909     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
910     VAPictureParameterBufferH264 *pic_param;
911     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
912     dri_bo *slice_data_bo;
913     int i, j;
914
915     assert(decode_state->pic_param && decode_state->pic_param->buffer);
916     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
917     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
918
919     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
920     intel_batchbuffer_emit_mi_flush(batch);
921     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
923     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
924     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
925     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
926     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
927     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
928
929     for (j = 0; j < decode_state->num_slice_params; j++) {
930         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
931         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
932         slice_data_bo = decode_state->slice_datas[j]->bo;
933         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
934
935         if (j == decode_state->num_slice_params - 1)
936             next_slice_group_param = NULL;
937         else
938             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
939
940         if (j == 0 && slice_param->first_mb_in_slice)
941             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
942
943         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
944             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
945             assert((slice_param->slice_type == SLICE_TYPE_I) ||
946                    (slice_param->slice_type == SLICE_TYPE_SI) ||
947                    (slice_param->slice_type == SLICE_TYPE_P) ||
948                    (slice_param->slice_type == SLICE_TYPE_SP) ||
949                    (slice_param->slice_type == SLICE_TYPE_B));
950
951             if (i < decode_state->slice_params[j]->num_elements - 1)
952                 next_slice_param = slice_param + 1;
953             else
954                 next_slice_param = next_slice_group_param;
955
956             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
957             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
958             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
959             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
960             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
961             slice_param++;
962         }
963     }
964
965     intel_batchbuffer_end_atomic(batch);
966     intel_batchbuffer_flush(batch);
967 }
968
969 static void
970 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
971                            struct decode_state *decode_state,
972                            struct gen7_mfd_context *gen7_mfd_context)
973 {
974     VAPictureParameterBufferMPEG2 *pic_param;
975     struct i965_driver_data *i965 = i965_driver_data(ctx);
976     struct object_surface *obj_surface;
977     dri_bo *bo;
978     unsigned int width_in_mbs;
979
980     assert(decode_state->pic_param && decode_state->pic_param->buffer);
981     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
982     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
983
984     mpeg2_set_reference_surfaces(
985         ctx,
986         gen7_mfd_context->reference_surface,
987         decode_state,
988         pic_param
989     );
990
991     /* Current decoded picture */
992     obj_surface = decode_state->render_object;
993     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
994
995     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
996     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
997     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
998     gen7_mfd_context->pre_deblocking_output.valid = 1;
999
1000     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1001     bo = dri_bo_alloc(i965->intel.bufmgr,
1002                       "bsd mpc row store",
1003                       width_in_mbs * 96,
1004                       0x1000);
1005     assert(bo);
1006     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1007     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1008
1009     gen7_mfd_context->post_deblocking_output.valid = 0;
1010     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1011     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1012     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1013     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1014 }
1015
1016 static void
1017 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1018                          struct decode_state *decode_state,
1019                          struct gen7_mfd_context *gen7_mfd_context)
1020 {
1021     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1022     VAPictureParameterBufferMPEG2 *pic_param;
1023     unsigned int slice_concealment_disable_bit = 0;
1024
1025     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1026     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1027
1028     slice_concealment_disable_bit = 1;
1029
1030     BEGIN_BCS_BATCH(batch, 13);
1031     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1032     OUT_BCS_BATCH(batch,
1033                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1034                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1035                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1036                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1037                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1038                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1039                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1040                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1041                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1042                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1043                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1044                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1045     OUT_BCS_BATCH(batch,
1046                   pic_param->picture_coding_type << 9);
1047     OUT_BCS_BATCH(batch,
1048                   (slice_concealment_disable_bit << 31) |
1049                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1050                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1051     OUT_BCS_BATCH(batch, 0);
1052     OUT_BCS_BATCH(batch, 0);
1053     OUT_BCS_BATCH(batch, 0);
1054     OUT_BCS_BATCH(batch, 0);
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     ADVANCE_BCS_BATCH(batch);
1061 }
1062
1063 static void
1064 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1065                         struct decode_state *decode_state,
1066                         struct gen7_mfd_context *gen7_mfd_context)
1067 {
1068     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1069     int i, j;
1070
1071     /* Update internal QM state */
1072     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1073         VAIQMatrixBufferMPEG2 * const iq_matrix =
1074             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1075
1076         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1077             iq_matrix->load_intra_quantiser_matrix) {
1078             gen_iq_matrix->load_intra_quantiser_matrix =
1079                 iq_matrix->load_intra_quantiser_matrix;
1080             if (iq_matrix->load_intra_quantiser_matrix) {
1081                 for (j = 0; j < 64; j++)
1082                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1083                         iq_matrix->intra_quantiser_matrix[j];
1084             }
1085         }
1086
1087         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1088             iq_matrix->load_non_intra_quantiser_matrix) {
1089             gen_iq_matrix->load_non_intra_quantiser_matrix =
1090                 iq_matrix->load_non_intra_quantiser_matrix;
1091             if (iq_matrix->load_non_intra_quantiser_matrix) {
1092                 for (j = 0; j < 64; j++)
1093                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1094                         iq_matrix->non_intra_quantiser_matrix[j];
1095             }
1096         }
1097     }
1098
1099     /* Commit QM state to HW */
1100     for (i = 0; i < 2; i++) {
1101         unsigned char *qm = NULL;
1102         int qm_type;
1103
1104         if (i == 0) {
1105             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1106                 qm = gen_iq_matrix->intra_quantiser_matrix;
1107                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1108             }
1109         } else {
1110             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1111                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1112                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1113             }
1114         }
1115
1116         if (!qm)
1117             continue;
1118
1119         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1120     }
1121 }
1122
1123 static void
1124 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1125                           VAPictureParameterBufferMPEG2 *pic_param,
1126                           VASliceParameterBufferMPEG2 *slice_param,
1127                           VASliceParameterBufferMPEG2 *next_slice_param,
1128                           struct gen7_mfd_context *gen7_mfd_context)
1129 {
1130     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1131     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1132     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1133
1134     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1135         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1136         is_field_pic = 1;
1137     is_field_pic_wa = is_field_pic &&
1138                       gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1139
1140     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1141     hpos0 = slice_param->slice_horizontal_position;
1142
1143     if (next_slice_param == NULL) {
1144         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1145         hpos1 = 0;
1146     } else {
1147         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1148         hpos1 = next_slice_param->slice_horizontal_position;
1149     }
1150
1151     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1152
1153     BEGIN_BCS_BATCH(batch, 5);
1154     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1155     OUT_BCS_BATCH(batch,
1156                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1157     OUT_BCS_BATCH(batch,
1158                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1159     OUT_BCS_BATCH(batch,
1160                   hpos0 << 24 |
1161                   vpos0 << 16 |
1162                   mb_count << 8 |
1163                   (next_slice_param == NULL) << 5 |
1164                   (next_slice_param == NULL) << 3 |
1165                   (slice_param->macroblock_offset & 0x7));
1166     OUT_BCS_BATCH(batch,
1167                   (slice_param->quantiser_scale_code << 24) |
1168                   (vpos1 << 8 | hpos1));
1169     ADVANCE_BCS_BATCH(batch);
1170 }
1171
1172 static void
1173 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1174                               struct decode_state *decode_state,
1175                               struct gen7_mfd_context *gen7_mfd_context)
1176 {
1177     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1178     VAPictureParameterBufferMPEG2 *pic_param;
1179     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1180     dri_bo *slice_data_bo;
1181     int i, j;
1182
1183     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1184     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1185
1186     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1187     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1188     intel_batchbuffer_emit_mi_flush(batch);
1189     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1194     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1195
1196     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1197         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1198             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1199
1200     for (j = 0; j < decode_state->num_slice_params; j++) {
1201         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1202         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1203         slice_data_bo = decode_state->slice_datas[j]->bo;
1204         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1205
1206         if (j == decode_state->num_slice_params - 1)
1207             next_slice_group_param = NULL;
1208         else
1209             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1210
1211         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1212             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1213
1214             if (i < decode_state->slice_params[j]->num_elements - 1)
1215                 next_slice_param = slice_param + 1;
1216             else
1217                 next_slice_param = next_slice_group_param;
1218
1219             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1220             slice_param++;
1221         }
1222     }
1223
1224     intel_batchbuffer_end_atomic(batch);
1225     intel_batchbuffer_flush(batch);
1226 }
1227
1228 static const int va_to_gen7_vc1_mv[4] = {
1229     1, /* 1-MV */
1230     2, /* 1-MV half-pel */
1231     3, /* 1-MV half-pef bilinear */
1232     0, /* Mixed MV */
1233 };
1234
1235 static const int b_picture_scale_factor[21] = {
1236     128, 85,  170, 64,  192,
1237     51,  102, 153, 204, 43,
1238     215, 37,  74,  111, 148,
1239     185, 222, 32,  96,  160,
1240     224,
1241 };
1242
1243 static const int va_to_gen7_vc1_condover[3] = {
1244     0,
1245     2,
1246     3
1247 };
1248
1249 static const int fptype_to_picture_type[8][2] = {
1250     {GEN7_VC1_I_PICTURE, GEN7_VC1_I_PICTURE},
1251     {GEN7_VC1_I_PICTURE, GEN7_VC1_P_PICTURE},
1252     {GEN7_VC1_P_PICTURE, GEN7_VC1_I_PICTURE},
1253     {GEN7_VC1_P_PICTURE, GEN7_VC1_P_PICTURE},
1254     {GEN7_VC1_B_PICTURE, GEN7_VC1_B_PICTURE},
1255     {GEN7_VC1_B_PICTURE, GEN7_VC1_BI_PICTURE},
1256     {GEN7_VC1_BI_PICTURE, GEN7_VC1_B_PICTURE},
1257     {GEN7_VC1_BI_PICTURE, GEN7_VC1_BI_PICTURE}
1258 };
1259
1260 static void
1261 gen8_mfd_free_vc1_surface(void **data)
1262 {
1263     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1264
1265     if (!gen7_vc1_surface)
1266         return;
1267
1268     dri_bo_unreference(gen7_vc1_surface->dmv_top);
1269     dri_bo_unreference(gen7_vc1_surface->dmv_bottom);
1270     free(gen7_vc1_surface);
1271     *data = NULL;
1272 }
1273
1274 static void
1275 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1276                           VAPictureParameterBufferVC1 *pic_param,
1277                           struct object_surface *obj_surface)
1278 {
1279     struct i965_driver_data *i965 = i965_driver_data(ctx);
1280     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1281     int height_in_mbs;
1282     int picture_type;
1283     int is_first_field = 1;
1284
1285     if (!pic_param->sequence_fields.bits.interlace ||
1286         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1287         picture_type = pic_param->picture_fields.bits.picture_type;
1288     } else {/* Field-Interlace */
1289         is_first_field = pic_param->picture_fields.bits.is_first_field;
1290         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1291     }
1292
1293     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1294
1295     if (!gen7_vc1_surface) {
1296         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1297
1298         if (!gen7_vc1_surface)
1299             return;
1300
1301         assert((obj_surface->size & 0x3f) == 0);
1302         obj_surface->private_data = gen7_vc1_surface;
1303     }
1304
1305     if (!pic_param->sequence_fields.bits.interlace ||
1306         pic_param->picture_fields.bits.frame_coding_mode < 2 || /* Progressive or Frame-Interlace */
1307         is_first_field) {
1308         gen7_vc1_surface->picture_type_top = 0;
1309         gen7_vc1_surface->picture_type_bottom = 0;
1310         gen7_vc1_surface->intensity_compensation_top = 0;
1311         gen7_vc1_surface->intensity_compensation_bottom = 0;
1312         gen7_vc1_surface->luma_scale_top[0] = 0;
1313         gen7_vc1_surface->luma_scale_top[1] = 0;
1314         gen7_vc1_surface->luma_scale_bottom[0] = 0;
1315         gen7_vc1_surface->luma_scale_bottom[1] = 0;
1316         gen7_vc1_surface->luma_shift_top[0] = 0;
1317         gen7_vc1_surface->luma_shift_top[1] = 0;
1318         gen7_vc1_surface->luma_shift_bottom[0] = 0;
1319         gen7_vc1_surface->luma_shift_bottom[1] = 0;
1320     }
1321
1322     if (!pic_param->sequence_fields.bits.interlace ||
1323         pic_param->picture_fields.bits.frame_coding_mode < 2) { /* Progressive or Frame-Interlace */
1324         gen7_vc1_surface->picture_type_top = picture_type;
1325         gen7_vc1_surface->picture_type_bottom = picture_type;
1326     } else if (pic_param->picture_fields.bits.top_field_first ^ is_first_field)
1327         gen7_vc1_surface->picture_type_bottom = picture_type;
1328     else
1329         gen7_vc1_surface->picture_type_top = picture_type;
1330
1331     /*
1332      * The Direct MV buffer is scalable with frame height, but
1333      * does not scale with frame width as the hardware assumes
1334      * that frame width is fixed at 128 MBs.
1335      */
1336
1337     if (gen7_vc1_surface->dmv_top == NULL) {
1338         height_in_mbs = ALIGN(obj_surface->orig_height, 16) / 16;
1339         gen7_vc1_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
1340                                              "direct mv w/r buffer",
1341                                              128 * height_in_mbs * 64,
1342                                              0x1000);
1343     }
1344
1345     if (pic_param->sequence_fields.bits.interlace &&
1346         gen7_vc1_surface->dmv_bottom == NULL) {
1347         height_in_mbs = ALIGN(obj_surface->orig_height, 32) / 32;
1348         gen7_vc1_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
1349                                              "direct mv w/r buffer",
1350                                              128 * height_in_mbs * 64,
1351                                              0x1000);
1352     }
1353 }
1354
1355 static void
1356 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1357                          struct decode_state *decode_state,
1358                          struct gen7_mfd_context *gen7_mfd_context)
1359 {
1360     VAPictureParameterBufferVC1 *pic_param;
1361     struct i965_driver_data *i965 = i965_driver_data(ctx);
1362     struct object_surface *obj_surface;
1363     struct gen7_vc1_surface *gen7_vc1_current_surface;
1364     struct gen7_vc1_surface *gen7_vc1_forward_surface;
1365     dri_bo *bo;
1366     int width_in_mbs;
1367     int picture_type;
1368     int is_first_field = 1;
1369     int i;
1370
1371     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1372     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1373     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1374
1375     if (!pic_param->sequence_fields.bits.interlace ||
1376         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1377         picture_type = pic_param->picture_fields.bits.picture_type;
1378     } else {/* Field-Interlace */
1379         is_first_field = pic_param->picture_fields.bits.is_first_field;
1380         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1381     }
1382
1383     /* Current decoded picture */
1384     obj_surface = decode_state->render_object;
1385     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1386     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1387
1388     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1389     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1390     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1391
1392     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1393     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1394     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1395
1396     if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1397         gen7_mfd_context->post_deblocking_output.valid = 0;
1398         gen7_mfd_context->pre_deblocking_output.valid = 1;
1399     } else {
1400         gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1401         gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1402     }
1403
1404     intel_update_vc1_frame_store_index(ctx,
1405                                        decode_state,
1406                                        pic_param,
1407                                        gen7_mfd_context->reference_surface);
1408
1409     if (picture_type == GEN7_VC1_P_PICTURE) {
1410         obj_surface = decode_state->reference_objects[0];
1411         gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1412         if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1413             obj_surface)
1414             gen7_vc1_forward_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1415         else
1416             gen7_vc1_forward_surface = NULL;
1417
1418         if (!pic_param->sequence_fields.bits.interlace ||
1419             pic_param->picture_fields.bits.frame_coding_mode == 0) { /* Progressive */
1420             if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1421                 if (gen7_vc1_forward_surface) {
1422                     gen7_vc1_forward_surface->intensity_compensation_top = 1;
1423                     gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
1424                     gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
1425                     gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
1426                     gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
1427                     gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
1428                 }
1429             }
1430         } else if (pic_param->sequence_fields.bits.interlace &&
1431             pic_param->picture_fields.bits.frame_coding_mode == 1) { /* Frame-Interlace */
1432             if (pic_param->picture_fields.bits.intensity_compensation) {
1433                 if (gen7_vc1_forward_surface) {
1434                     gen7_vc1_forward_surface->intensity_compensation_top = 1;
1435                     gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
1436                     gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
1437                     gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
1438                     gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
1439                     gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
1440                 }
1441             }
1442         } else if (pic_param->sequence_fields.bits.interlace &&
1443                    pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
1444             if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1445                 if (pic_param->intensity_compensation_field == 1 || /* Top field */
1446                     pic_param->intensity_compensation_field == 0) { /* Both fields */
1447                     if (is_first_field) {
1448                         if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1449                              (pic_param->reference_fields.bits.reference_field_pic_indicator ==
1450                              pic_param->picture_fields.bits.top_field_first)) ||
1451                             pic_param->reference_fields.bits.num_reference_pictures) {
1452                             if (gen7_vc1_forward_surface) {
1453                                 i = gen7_vc1_forward_surface->intensity_compensation_top++;
1454                                 gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
1455                                 gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
1456                             }
1457                         }
1458                     } else { /* Second field */
1459                         if (pic_param->picture_fields.bits.top_field_first) {
1460                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1461                                  !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1462                                 pic_param->reference_fields.bits.num_reference_pictures) {
1463                                 i = gen7_vc1_current_surface->intensity_compensation_top++;
1464                                 gen7_vc1_current_surface->luma_scale_top[i] = pic_param->luma_scale;
1465                                 gen7_vc1_current_surface->luma_shift_top[i] = pic_param->luma_shift;
1466                             }
1467                         } else {
1468                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1469                                  pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1470                                 pic_param->reference_fields.bits.num_reference_pictures) {
1471                                 if (gen7_vc1_forward_surface) {
1472                                     i = gen7_vc1_forward_surface->intensity_compensation_top++;
1473                                     gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
1474                                     gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
1475                                 }
1476                             }
1477                         }
1478                     }
1479                 }
1480                 if (pic_param->intensity_compensation_field == 2 || /* Bottom field */
1481                     pic_param->intensity_compensation_field == 0) { /* Both fields */
1482                     if (is_first_field) {
1483                         if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1484                              (pic_param->reference_fields.bits.reference_field_pic_indicator ^
1485                               pic_param->picture_fields.bits.top_field_first)) ||
1486                             pic_param->reference_fields.bits.num_reference_pictures) {
1487                             if (gen7_vc1_forward_surface) {
1488                                 i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
1489                                 if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1490                                     gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1491                                     gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1492                                 } else { /* Both fields */
1493                                     gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1494                                     gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1495                                 }
1496                             }
1497                         }
1498                     } else { /* Second field */
1499                         if (pic_param->picture_fields.bits.top_field_first) {
1500                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1501                                  pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1502                                 pic_param->reference_fields.bits.num_reference_pictures) {
1503                                 if (gen7_vc1_forward_surface) {
1504                                     i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
1505                                     if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1506                                         gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1507                                         gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1508                                     } else { /* Both fields */
1509                                         gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1510                                         gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1511                                     }
1512                                 }
1513                             }
1514                         } else {
1515                            if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1516                                  !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1517                                 pic_param->reference_fields.bits.num_reference_pictures) {
1518                                 i = gen7_vc1_current_surface->intensity_compensation_bottom++;
1519                                if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1520                                    gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1521                                    gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1522                                 } else { /* Both fields */
1523                                     gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1524                                     gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1525                                 }
1526                             }
1527                         }
1528                     }
1529                 }
1530             }
1531         }
1532     }
1533
1534     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1535     bo = dri_bo_alloc(i965->intel.bufmgr,
1536                       "intra row store",
1537                       width_in_mbs * 64,
1538                       0x1000);
1539     assert(bo);
1540     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1541     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1542
1543     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1544     bo = dri_bo_alloc(i965->intel.bufmgr,
1545                       "deblocking filter row store",
1546                       width_in_mbs * 7 * 64,
1547                       0x1000);
1548     assert(bo);
1549     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1550     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1551
1552     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1553     bo = dri_bo_alloc(i965->intel.bufmgr,
1554                       "bsd mpc row store",
1555                       width_in_mbs * 96,
1556                       0x1000);
1557     assert(bo);
1558     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1559     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1560
1561     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1562
1563     if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1564         gen7_mfd_context->bitplane_read_buffer.valid = 1;
1565     else
1566         gen7_mfd_context->bitplane_read_buffer.valid = !!(pic_param->bitplane_present.value & 0x7f);
1567     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1568
1569     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1570         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1571         int height_in_mbs;
1572         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1573         int src_w, src_h;
1574         uint8_t *src = NULL, *dst = NULL;
1575
1576         if (!pic_param->sequence_fields.bits.interlace ||
1577             (pic_param->picture_fields.bits.frame_coding_mode < 2)) /* Progressive or Frame-Interlace */
1578             height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1579         else /* Field-Interlace */
1580             height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
1581
1582         bo = dri_bo_alloc(i965->intel.bufmgr,
1583                           "VC-1 Bitplane",
1584                           bitplane_width * height_in_mbs,
1585                           0x1000);
1586         assert(bo);
1587         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1588
1589         dri_bo_map(bo, True);
1590         assert(bo->virtual);
1591         dst = bo->virtual;
1592
1593         if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1594             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1595                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1596                     int dst_index;
1597                     uint8_t src_value = 0x2;
1598
1599                     dst_index = src_w / 2;
1600                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1601                 }
1602
1603                 if (src_w & 1)
1604                     dst[src_w / 2] >>= 4;
1605
1606                 dst += bitplane_width;
1607             }
1608         } else {
1609             assert(decode_state->bit_plane->buffer);
1610             src = decode_state->bit_plane->buffer;
1611
1612             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1613                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1614                     int src_index, dst_index;
1615                     int src_shift;
1616                     uint8_t src_value;
1617
1618                     src_index = (src_h * width_in_mbs + src_w) / 2;
1619                     src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1620                     src_value = ((src[src_index] >> src_shift) & 0xf);
1621
1622                     dst_index = src_w / 2;
1623                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1624                 }
1625
1626                 if (src_w & 1)
1627                     dst[src_w / 2] >>= 4;
1628
1629                 dst += bitplane_width;
1630             }
1631         }
1632
1633         dri_bo_unmap(bo);
1634     } else
1635         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1636 }
1637
1638 static void
1639 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1640                        struct decode_state *decode_state,
1641                        struct gen7_mfd_context *gen7_mfd_context)
1642 {
1643     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1644     VAPictureParameterBufferVC1 *pic_param;
1645     struct object_surface *obj_surface;
1646     struct gen7_vc1_surface *gen7_vc1_current_surface;
1647     struct gen7_vc1_surface *gen7_vc1_reference_surface;
1648     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1649     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1650     int unified_mv_mode = 0;
1651     int ref_field_pic_polarity = 0;
1652     int scale_factor = 0;
1653     int trans_ac_y = 0;
1654     int dmv_surface_valid = 0;
1655     int frfd = 0;
1656     int brfd = 0;
1657     int fcm = 0;
1658     int picture_type;
1659     int ptype;
1660     int overlap = 0;
1661     int interpolation_mode = 0;
1662     int height_in_mbs;
1663     int is_first_field = 1;
1664     int loopfilter = 0;
1665     int bitplane_present;
1666     int range_reduction = 0;
1667     int range_reduction_scale = 0;
1668     int forward_mb = 0, mv_type_mb = 0, skip_mb = 0, direct_mb = 0;
1669     int overflags = 0, ac_pred = 0, field_tx = 0;
1670
1671     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1672     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1673
1674     if (!pic_param->sequence_fields.bits.interlace ||
1675         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1676         picture_type = pic_param->picture_fields.bits.picture_type;
1677         height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1678     } else {/* Field-Interlace */
1679         is_first_field = pic_param->picture_fields.bits.is_first_field;
1680         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1681         height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
1682     }
1683
1684     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1685     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1686     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1687     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1688     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1689     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1690     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1691
1692     if (dquant == 0) {
1693         alt_pquant_config = 0;
1694         alt_pquant_edge_mask = 0;
1695     } else if (dquant == 2) {
1696         alt_pquant_config = 1;
1697         alt_pquant_edge_mask = 0xf;
1698     } else {
1699         assert(dquant == 1);
1700         if (dquantfrm == 0) {
1701             alt_pquant_config = 0;
1702             alt_pquant_edge_mask = 0;
1703             alt_pq = 0;
1704         } else {
1705             assert(dquantfrm == 1);
1706             alt_pquant_config = 1;
1707
1708             switch (dqprofile) {
1709             case 3:
1710                 if (dqbilevel == 0) {
1711                     alt_pquant_config = 2;
1712                     alt_pquant_edge_mask = 0;
1713                 } else {
1714                     assert(dqbilevel == 1);
1715                     alt_pquant_config = 3;
1716                     alt_pquant_edge_mask = 0;
1717                 }
1718                 break;
1719
1720             case 0:
1721                 alt_pquant_edge_mask = 0xf;
1722                 break;
1723
1724             case 1:
1725                 if (dqdbedge == 3)
1726                     alt_pquant_edge_mask = 0x9;
1727                 else
1728                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1729
1730                 break;
1731
1732             case 2:
1733                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1734                 break;
1735
1736             default:
1737                 assert(0);
1738             }
1739         }
1740     }
1741
1742     if (pic_param->sequence_fields.bits.profile == 1 && /* Main Profile */
1743         pic_param->sequence_fields.bits.rangered) {
1744         obj_surface = decode_state->reference_objects[0];
1745
1746         gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1747
1748         if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1749             obj_surface)
1750             gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1751         else
1752             gen7_vc1_reference_surface = NULL;
1753
1754         if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1755             if (gen7_vc1_reference_surface)
1756                 gen7_vc1_current_surface->range_reduction_frame = gen7_vc1_reference_surface->range_reduction_frame;
1757             else
1758                 gen7_vc1_current_surface->range_reduction_frame = 0;
1759         else
1760             gen7_vc1_current_surface->range_reduction_frame = pic_param->range_reduction_frame;
1761
1762         if (gen7_vc1_reference_surface) {
1763             if (gen7_vc1_current_surface->range_reduction_frame &&
1764                 !gen7_vc1_reference_surface->range_reduction_frame) {
1765                 range_reduction = 1;
1766                 range_reduction_scale = 0;
1767             } else if (!gen7_vc1_current_surface->range_reduction_frame &&
1768                        gen7_vc1_reference_surface->range_reduction_frame) {
1769                 range_reduction = 1;
1770                 range_reduction_scale = 1;
1771             }
1772         }
1773     }
1774
1775     if ((!pic_param->sequence_fields.bits.interlace ||
1776          pic_param->picture_fields.bits.frame_coding_mode != 1) && /* Progressive or Field-Interlace */
1777         (picture_type == GEN7_VC1_P_PICTURE ||
1778          picture_type == GEN7_VC1_B_PICTURE)) {
1779         if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1780             assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1781             unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1782         } else {
1783             assert(pic_param->mv_fields.bits.mv_mode < 4);
1784             unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1785         }
1786     }
1787
1788     if (pic_param->sequence_fields.bits.interlace &&
1789         pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
1790         picture_type == GEN7_VC1_P_PICTURE &&
1791         !pic_param->reference_fields.bits.num_reference_pictures) {
1792         if (pic_param->reference_fields.bits.reference_field_pic_indicator == 0) {
1793             ref_field_pic_polarity = is_first_field ?
1794                                         pic_param->picture_fields.bits.top_field_first :
1795                                         !pic_param->picture_fields.bits.top_field_first;
1796         } else {
1797             ref_field_pic_polarity = is_first_field ?
1798                                         !pic_param->picture_fields.bits.top_field_first :
1799                                         pic_param->picture_fields.bits.top_field_first;
1800         }
1801     }
1802
1803     if (pic_param->b_picture_fraction < 21)
1804         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1805
1806     if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1807         ptype = GEN7_VC1_P_PICTURE;
1808         bitplane_present = 1;
1809     } else {
1810         ptype = pic_param->picture_fields.bits.picture_type;
1811         bitplane_present = !!(pic_param->bitplane_present.value & 0x7f);
1812         forward_mb = pic_param->raw_coding.flags.forward_mb;
1813         mv_type_mb = pic_param->raw_coding.flags.mv_type_mb;
1814         skip_mb = pic_param->raw_coding.flags.skip_mb;
1815         direct_mb = pic_param->raw_coding.flags.direct_mb;
1816         overflags = pic_param->raw_coding.flags.overflags;
1817         ac_pred = pic_param->raw_coding.flags.ac_pred;
1818         field_tx = pic_param->raw_coding.flags.field_tx;
1819         loopfilter = pic_param->entrypoint_fields.bits.loopfilter;
1820     }
1821
1822     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1823         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1824     else {
1825         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1826
1827         /*
1828          * 8.3.6.2.1 Transform Type Selection
1829          * If variable-sized transform coding is not enabled,
1830          * then the 8x8 transform shall be used for all blocks.
1831          * it is also MFX_VC1_PIC_STATE requirement.
1832          */
1833         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1834             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1835             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1836         }
1837     }
1838
1839     if (picture_type == GEN7_VC1_B_PICTURE) {
1840         obj_surface = decode_state->reference_objects[1];
1841
1842         if (pic_param->backward_reference_picture != VA_INVALID_ID &&
1843             obj_surface)
1844             gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1845         else
1846             gen7_vc1_reference_surface = NULL;
1847
1848         if (gen7_vc1_reference_surface) {
1849             if (pic_param->sequence_fields.bits.interlace &&
1850                 pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
1851                 pic_param->picture_fields.bits.top_field_first ^ is_first_field) {
1852                 if (gen7_vc1_reference_surface->picture_type_bottom == GEN7_VC1_P_PICTURE)
1853                     dmv_surface_valid = 1;
1854             } else if (gen7_vc1_reference_surface->picture_type_top == GEN7_VC1_P_PICTURE)
1855                 dmv_surface_valid = 1;
1856         }
1857     }
1858
1859     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1860
1861     if (pic_param->sequence_fields.bits.interlace) {
1862         if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1863             fcm = pic_param->picture_fields.bits.frame_coding_mode;
1864         else if (!pic_param->picture_fields.bits.top_field_first)
1865             fcm = 3; /* Field with bottom field first */
1866         else
1867             fcm = 2; /* Field with top field first */
1868     }
1869
1870     if (pic_param->sequence_fields.bits.interlace &&
1871         pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
1872         if (picture_type == GEN7_VC1_I_PICTURE ||
1873              picture_type == GEN7_VC1_P_PICTURE) {
1874             gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1875
1876             if (is_first_field)
1877                 gen7_vc1_current_surface->reference_distance = pic_param->reference_fields.bits.reference_distance;
1878
1879             frfd = gen7_vc1_current_surface->reference_distance;
1880         } else if (picture_type == GEN7_VC1_B_PICTURE) {
1881             obj_surface = decode_state->reference_objects[1];
1882
1883             if (pic_param->backward_reference_picture != VA_INVALID_ID &&
1884                 obj_surface)
1885                 gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1886             else
1887                 gen7_vc1_reference_surface = NULL;
1888
1889             if (gen7_vc1_reference_surface) {
1890                 frfd = (scale_factor * gen7_vc1_reference_surface->reference_distance) >> 8;
1891
1892                 brfd = gen7_vc1_reference_surface->reference_distance - frfd - 1;
1893                 if (brfd < 0)
1894                     brfd = 0;
1895             }
1896         }
1897     }
1898
1899     if (pic_param->sequence_fields.bits.overlap) {
1900         if (pic_param->sequence_fields.bits.profile == 3) { /* Advanced Profile */
1901             if (picture_type == GEN7_VC1_P_PICTURE &&
1902                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1903                 overlap = 1;
1904             }
1905             if (picture_type == GEN7_VC1_I_PICTURE ||
1906                 picture_type == GEN7_VC1_BI_PICTURE) {
1907                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1908                     overlap = 1;
1909                 } else if (pic_param->conditional_overlap_flag == 1 || /* all block boundaries */
1910                            pic_param->conditional_overlap_flag == 2) { /* coded by OVERFLAGSMB bitplane */
1911                     overlap = 1;
1912                 }
1913             }
1914         } else {
1915             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1916                 picture_type != GEN7_VC1_B_PICTURE) {
1917                 overlap = 1;
1918             }
1919         }
1920     }
1921
1922     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1923         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1924          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1925         interpolation_mode = 8 | pic_param->fast_uvmc_flag;
1926     else
1927         interpolation_mode = 0 | pic_param->fast_uvmc_flag;
1928
1929     BEGIN_BCS_BATCH(batch, 6);
1930     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1931     OUT_BCS_BATCH(batch,
1932                   ((height_in_mbs - 1) << 16) |
1933                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1934     OUT_BCS_BATCH(batch,
1935                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1936                   dmv_surface_valid << 15 |
1937                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1938                   pic_param->rounding_control << 13 |
1939                   pic_param->sequence_fields.bits.syncmarker << 12 |
1940                   interpolation_mode << 8 |
1941                   range_reduction_scale << 7 |
1942                   range_reduction << 6 |
1943                   loopfilter << 5 |
1944                   overlap << 4 |
1945                   !is_first_field << 3 |
1946                   (pic_param->sequence_fields.bits.profile == 3) << 0); /* Advanced Profile */
1947     OUT_BCS_BATCH(batch,
1948                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1949                   ptype << 26 |
1950                   fcm << 24 |
1951                   alt_pq << 16 |
1952                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1953                   scale_factor << 0);
1954     OUT_BCS_BATCH(batch,
1955                   unified_mv_mode << 28 |
1956                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1957                   pic_param->fast_uvmc_flag << 26 |
1958                   ref_field_pic_polarity << 25 |
1959                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1960                   brfd << 20 |
1961                   frfd << 16 |
1962                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1963                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1964                   alt_pquant_edge_mask << 4 |
1965                   alt_pquant_config << 2 |
1966                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1967                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1968     OUT_BCS_BATCH(batch,
1969                   bitplane_present << 31 |
1970                   forward_mb << 30 |
1971                   mv_type_mb << 29 |
1972                   skip_mb << 28 |
1973                   direct_mb << 27 |
1974                   overflags << 26 |
1975                   ac_pred << 25 |
1976                   field_tx << 24 |
1977                   pic_param->mv_fields.bits.mv_table << 20 |
1978                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1979                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1980                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1981                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1982                   pic_param->mb_mode_table << 8 |
1983                   trans_ac_y << 6 |
1984                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1985                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1986                   pic_param->cbp_table << 0);
1987     ADVANCE_BCS_BATCH(batch);
1988 }
1989
1990 static void
1991 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1992                              struct decode_state *decode_state,
1993                              struct gen7_mfd_context *gen7_mfd_context)
1994 {
1995     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1996     VAPictureParameterBufferVC1 *pic_param;
1997     struct gen7_vc1_surface *gen7_vc1_top_surface;
1998     struct gen7_vc1_surface *gen7_vc1_bottom_surface;
1999     int picture_type;
2000     int is_first_field = 1;
2001     int intensitycomp_single_fwd = 0;
2002     int intensitycomp_single_bwd = 0;
2003     int intensitycomp_double_fwd = 0;
2004     int lumscale1_single_fwd = 0;
2005     int lumscale2_single_fwd = 0;
2006     int lumshift1_single_fwd = 0;
2007     int lumshift2_single_fwd = 0;
2008     int lumscale1_single_bwd = 0;
2009     int lumscale2_single_bwd = 0;
2010     int lumshift1_single_bwd = 0;
2011     int lumshift2_single_bwd = 0;
2012     int lumscale1_double_fwd = 0;
2013     int lumscale2_double_fwd = 0;
2014     int lumshift1_double_fwd = 0;
2015     int lumshift2_double_fwd = 0;
2016     int replication_mode = 0;
2017
2018     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2019     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2020
2021     if (!pic_param->sequence_fields.bits.interlace ||
2022         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
2023         picture_type = pic_param->picture_fields.bits.picture_type;
2024     } else {/* Field-Interlace */
2025         is_first_field = pic_param->picture_fields.bits.is_first_field;
2026         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
2027     }
2028
2029     if (picture_type == GEN7_VC1_P_PICTURE ||
2030         picture_type == GEN7_VC1_B_PICTURE) {
2031         if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID)
2032             gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[0].obj_surface->private_data);
2033         else
2034             gen7_vc1_top_surface = NULL;
2035
2036         if (gen7_vc1_top_surface) {
2037             intensitycomp_single_fwd = !!gen7_vc1_top_surface->intensity_compensation_top;
2038             lumscale1_single_fwd = gen7_vc1_top_surface->luma_scale_top[0];
2039             lumshift1_single_fwd = gen7_vc1_top_surface->luma_shift_top[0];
2040             if (gen7_vc1_top_surface->intensity_compensation_top == 2) {
2041                 intensitycomp_double_fwd = 1;
2042                 lumscale1_double_fwd = gen7_vc1_top_surface->luma_scale_top[1];
2043                 lumshift1_double_fwd = gen7_vc1_top_surface->luma_shift_top[1];
2044             }
2045         }
2046
2047         if (pic_param->sequence_fields.bits.interlace &&
2048             pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2049             if (gen7_mfd_context->reference_surface[2].surface_id != VA_INVALID_ID)
2050                 gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[2].obj_surface->private_data);
2051             else
2052                 gen7_vc1_bottom_surface = NULL;
2053
2054             if (gen7_vc1_bottom_surface) {
2055                 intensitycomp_single_fwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
2056                 lumscale2_single_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
2057                 lumshift2_single_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
2058                 if (gen7_vc1_bottom_surface->intensity_compensation_bottom == 2) {
2059                     intensitycomp_double_fwd |= 2;
2060                     lumscale2_double_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[1];
2061                     lumshift2_double_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[1];
2062                 }
2063             }
2064         }
2065     }
2066
2067     if (picture_type == GEN7_VC1_B_PICTURE) {
2068         if (gen7_mfd_context->reference_surface[1].surface_id != VA_INVALID_ID)
2069             gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[1].obj_surface->private_data);
2070         else
2071             gen7_vc1_top_surface = NULL;
2072
2073         if (gen7_vc1_top_surface) {
2074             intensitycomp_single_bwd = !!gen7_vc1_top_surface->intensity_compensation_top;
2075             lumscale1_single_bwd = gen7_vc1_top_surface->luma_scale_top[0];
2076             lumshift1_single_bwd = gen7_vc1_top_surface->luma_shift_top[0];
2077         }
2078
2079         if (pic_param->sequence_fields.bits.interlace &&
2080             pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2081             if (gen7_mfd_context->reference_surface[3].surface_id != VA_INVALID_ID)
2082                 gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[3].obj_surface->private_data);
2083             else
2084                 gen7_vc1_bottom_surface = NULL;
2085
2086             if (gen7_vc1_bottom_surface) {
2087                 intensitycomp_single_bwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
2088                 lumscale2_single_bwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
2089                 lumshift2_single_bwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
2090             }
2091         }
2092     }
2093
2094     if (pic_param->sequence_fields.bits.interlace &&
2095         pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2096         if (picture_type == GEN7_VC1_P_PICTURE)
2097             replication_mode = 0x5;
2098         else if (picture_type == GEN7_VC1_B_PICTURE)
2099             replication_mode = 0xf;
2100     }
2101
2102     BEGIN_BCS_BATCH(batch, 6);
2103     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
2104     OUT_BCS_BATCH(batch,
2105                   intensitycomp_double_fwd << 14 |
2106                   0 << 12 |
2107                   intensitycomp_single_fwd << 10 |
2108                   intensitycomp_single_bwd << 8 |
2109                   replication_mode << 4 |
2110                   0);
2111     OUT_BCS_BATCH(batch,
2112                   lumshift2_single_fwd << 24 |
2113                   lumshift1_single_fwd << 16 |
2114                   lumscale2_single_fwd << 8 |
2115                   lumscale1_single_fwd << 0);
2116     OUT_BCS_BATCH(batch,
2117                   lumshift2_double_fwd << 24 |
2118                   lumshift1_double_fwd << 16 |
2119                   lumscale2_double_fwd << 8 |
2120                   lumscale1_double_fwd << 0);
2121     OUT_BCS_BATCH(batch,
2122                   lumshift2_single_bwd << 24 |
2123                   lumshift1_single_bwd << 16 |
2124                   lumscale2_single_bwd << 8 |
2125                   lumscale1_single_bwd << 0);
2126     OUT_BCS_BATCH(batch,
2127                   0 << 24 |
2128                   0 << 16 |
2129                   0 << 8 |
2130                   0 << 0);
2131     ADVANCE_BCS_BATCH(batch);
2132 }
2133
2134 static void
2135 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
2136                               struct decode_state *decode_state,
2137                               struct gen7_mfd_context *gen7_mfd_context)
2138 {
2139     struct i965_driver_data *i965 = i965_driver_data(ctx);
2140     VAPictureParameterBufferVC1 *pic_param;
2141     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2142     struct object_surface *obj_surface;
2143     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2144     int picture_type;
2145     int is_first_field = 1;
2146
2147     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2148
2149     if (!pic_param->sequence_fields.bits.interlace ||
2150         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
2151         picture_type = pic_param->picture_fields.bits.picture_type;
2152     } else {/* Field-Interlace */
2153         is_first_field = pic_param->picture_fields.bits.is_first_field;
2154         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
2155     }
2156
2157     if (picture_type == GEN7_VC1_P_PICTURE ||
2158         picture_type == GEN7_VC1_SKIPPED_PICTURE) {
2159         obj_surface = decode_state->render_object;
2160
2161         if (pic_param->sequence_fields.bits.interlace &&
2162             (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2163             (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2164             dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2165         else
2166             dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2167     }
2168
2169     if (picture_type == GEN7_VC1_B_PICTURE) {
2170         obj_surface = decode_state->reference_objects[1];
2171         if (pic_param->backward_reference_picture != VA_INVALID_ID &&
2172             obj_surface &&
2173             obj_surface->private_data) {
2174
2175             if (pic_param->sequence_fields.bits.interlace &&
2176                 (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2177                 (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2178                 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2179             else
2180                 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2181         }
2182     }
2183
2184     BEGIN_BCS_BATCH(batch, 7);
2185     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
2186
2187     if (dmv_write_buffer)
2188         OUT_BCS_RELOC64(batch, dmv_write_buffer,
2189                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2190                         0);
2191     else {
2192         OUT_BCS_BATCH(batch, 0);
2193         OUT_BCS_BATCH(batch, 0);
2194     }
2195
2196     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2197
2198     if (dmv_read_buffer)
2199         OUT_BCS_RELOC64(batch, dmv_read_buffer,
2200                         I915_GEM_DOMAIN_INSTRUCTION, 0,
2201                         0);
2202     else {
2203         OUT_BCS_BATCH(batch, 0);
2204         OUT_BCS_BATCH(batch, 0);
2205     }
2206
2207     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2208
2209     ADVANCE_BCS_BATCH(batch);
2210 }
2211
2212 static int
2213 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
2214 {
2215     int out_slice_data_bit_offset;
2216     int slice_header_size = in_slice_data_bit_offset / 8;
2217     int i, j;
2218
2219     if (profile == 3 && slice_header_size) { /* Advanced Profile */
2220         for (i = 0, j = 0; i < slice_header_size - 1; i++, j++)
2221             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4)
2222                     i++, j += 2;
2223
2224         if (i == slice_header_size - 1) {
2225             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
2226                 buf[j + 2] = 0;
2227                 j++;
2228             }
2229
2230             j++;
2231         }
2232
2233         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
2234     } else /* Simple or Main Profile */
2235         out_slice_data_bit_offset = in_slice_data_bit_offset;
2236
2237     return out_slice_data_bit_offset;
2238 }
2239
2240 static void
2241 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
2242                         VAPictureParameterBufferVC1 *pic_param,
2243                         VASliceParameterBufferVC1 *slice_param,
2244                         VASliceParameterBufferVC1 *next_slice_param,
2245                         dri_bo *slice_data_bo,
2246                         struct gen7_mfd_context *gen7_mfd_context)
2247 {
2248     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2249     int next_slice_start_vert_pos;
2250     int macroblock_offset;
2251     uint8_t *slice_data = NULL;
2252
2253     dri_bo_map(slice_data_bo, True);
2254     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
2255     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
2256                                                                slice_param->macroblock_offset,
2257                                                                pic_param->sequence_fields.bits.profile);
2258     dri_bo_unmap(slice_data_bo);
2259
2260     if (next_slice_param)
2261         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
2262     else if (!pic_param->sequence_fields.bits.interlace ||
2263              pic_param->picture_fields.bits.frame_coding_mode < 2) /* Progressive or Frame-Interlace */
2264         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
2265     else /* Field-Interlace */
2266         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 32) / 32;
2267
2268     BEGIN_BCS_BATCH(batch, 5);
2269     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
2270     OUT_BCS_BATCH(batch,
2271                   slice_param->slice_data_size - (macroblock_offset >> 3));
2272     OUT_BCS_BATCH(batch,
2273                   slice_param->slice_data_offset + (macroblock_offset >> 3));
2274     OUT_BCS_BATCH(batch,
2275                   slice_param->slice_vertical_position << 16 |
2276                   next_slice_start_vert_pos << 0);
2277     OUT_BCS_BATCH(batch,
2278                   (macroblock_offset & 0x7));
2279     ADVANCE_BCS_BATCH(batch);
2280 }
2281
2282 static void
2283 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
2284                             struct decode_state *decode_state,
2285                             struct gen7_mfd_context *gen7_mfd_context)
2286 {
2287     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2288     VAPictureParameterBufferVC1 *pic_param;
2289     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
2290     dri_bo *slice_data_bo;
2291     int i, j;
2292
2293     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2294     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2295
2296     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
2297     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2298     intel_batchbuffer_emit_mi_flush(batch);
2299     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2300     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2301     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2302     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2303     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
2304     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
2305     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
2306
2307     for (j = 0; j < decode_state->num_slice_params; j++) {
2308         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2309         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2310         slice_data_bo = decode_state->slice_datas[j]->bo;
2311         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
2312
2313         if (j == decode_state->num_slice_params - 1)
2314             next_slice_group_param = NULL;
2315         else
2316             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2317
2318         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2319             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2320
2321             if (i < decode_state->slice_params[j]->num_elements - 1)
2322                 next_slice_param = slice_param + 1;
2323             else
2324                 next_slice_param = next_slice_group_param;
2325
2326             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2327             slice_param++;
2328         }
2329     }
2330
2331     intel_batchbuffer_end_atomic(batch);
2332     intel_batchbuffer_flush(batch);
2333 }
2334
2335 static void
2336 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
2337                           struct decode_state *decode_state,
2338                           struct gen7_mfd_context *gen7_mfd_context)
2339 {
2340     struct object_surface *obj_surface;
2341     VAPictureParameterBufferJPEGBaseline *pic_param;
2342     int subsampling = SUBSAMPLE_YUV420;
2343     int fourcc = VA_FOURCC_IMC3;
2344
2345     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2346
2347     if (pic_param->num_components == 1) {
2348         subsampling = SUBSAMPLE_YUV400;
2349         fourcc = VA_FOURCC_Y800;
2350     } else if (pic_param->num_components == 3) {
2351         int h1 = pic_param->components[0].h_sampling_factor;
2352         int h2 = pic_param->components[1].h_sampling_factor;
2353         int h3 = pic_param->components[2].h_sampling_factor;
2354         int v1 = pic_param->components[0].v_sampling_factor;
2355         int v2 = pic_param->components[1].v_sampling_factor;
2356         int v3 = pic_param->components[2].v_sampling_factor;
2357
2358         if (h1 == 2 * h2 && h2 == h3 &&
2359             v1 == 2 * v2 && v2 == v3) {
2360             subsampling = SUBSAMPLE_YUV420;
2361             fourcc = VA_FOURCC_IMC3;
2362         } else if (h1 == 2 * h2  && h2 == h3 &&
2363                    v1 == v2 && v2 == v3) {
2364             subsampling = SUBSAMPLE_YUV422H;
2365             fourcc = VA_FOURCC_422H;
2366         } else if (h1 == h2 && h2 == h3 &&
2367                    v1 == v2  && v2 == v3) {
2368             subsampling = SUBSAMPLE_YUV444;
2369             fourcc = VA_FOURCC_444P;
2370         } else if (h1 == 4 * h2 && h2 ==  h3 &&
2371                    v1 == v2 && v2 == v3) {
2372             subsampling = SUBSAMPLE_YUV411;
2373             fourcc = VA_FOURCC_411P;
2374         } else if (h1 == h2 && h2 == h3 &&
2375                    v1 == 2 * v2 && v2 == v3) {
2376             subsampling = SUBSAMPLE_YUV422V;
2377             fourcc = VA_FOURCC_422V;
2378         } else
2379             assert(0);
2380     } else {
2381         assert(0);
2382     }
2383
2384     /* Current decoded picture */
2385     obj_surface = decode_state->render_object;
2386     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
2387
2388     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2389     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2390     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2391     gen7_mfd_context->pre_deblocking_output.valid = 1;
2392
2393     gen7_mfd_context->post_deblocking_output.bo = NULL;
2394     gen7_mfd_context->post_deblocking_output.valid = 0;
2395
2396     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2397     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2398
2399     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2400     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2401
2402     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2403     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2404
2405     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2406     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2407
2408     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2409     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2410 }
2411
2412 static const int va_to_gen7_jpeg_rotation[4] = {
2413     GEN7_JPEG_ROTATION_0,
2414     GEN7_JPEG_ROTATION_90,
2415     GEN7_JPEG_ROTATION_180,
2416     GEN7_JPEG_ROTATION_270
2417 };
2418
2419 static void
2420 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
2421                         struct decode_state *decode_state,
2422                         struct gen7_mfd_context *gen7_mfd_context)
2423 {
2424     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2425     VAPictureParameterBufferJPEGBaseline *pic_param;
2426     int chroma_type = GEN7_YUV420;
2427     int frame_width_in_blks;
2428     int frame_height_in_blks;
2429
2430     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2431     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2432
2433     if (pic_param->num_components == 1)
2434         chroma_type = GEN7_YUV400;
2435     else if (pic_param->num_components == 3) {
2436         int h1 = pic_param->components[0].h_sampling_factor;
2437         int h2 = pic_param->components[1].h_sampling_factor;
2438         int h3 = pic_param->components[2].h_sampling_factor;
2439         int v1 = pic_param->components[0].v_sampling_factor;
2440         int v2 = pic_param->components[1].v_sampling_factor;
2441         int v3 = pic_param->components[2].v_sampling_factor;
2442
2443         if (h1 == 2 * h2 && h2 == h3 &&
2444             v1 == 2 * v2 && v2 == v3)
2445             chroma_type = GEN7_YUV420;
2446         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2447                  v1 == 1 && v2 == 1 && v3 == 1)
2448             chroma_type = GEN7_YUV422H_2Y;
2449         else if (h1 == h2 && h2 == h3 &&
2450                  v1 == v2 && v2 == v3)
2451             chroma_type = GEN7_YUV444;
2452         else if (h1 == 4 * h2 && h2 == h3 &&
2453                  v1 == v2 && v2 == v3)
2454             chroma_type = GEN7_YUV411;
2455         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2456                  v1 == 2 && v2 == 1 && v3 == 1)
2457             chroma_type = GEN7_YUV422V_2Y;
2458         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2459                  v1 == 2 && v2 == 2 && v3 == 2)
2460             chroma_type = GEN7_YUV422H_4Y;
2461         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2462                  v1 == 2 && v2 == 1 && v3 == 1)
2463             chroma_type = GEN7_YUV422V_4Y;
2464         else
2465             assert(0);
2466     }
2467
2468     if (chroma_type == GEN7_YUV400 ||
2469         chroma_type == GEN7_YUV444 ||
2470         chroma_type == GEN7_YUV422V_2Y) {
2471         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2472         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2473     } else if (chroma_type == GEN7_YUV411) {
2474         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2475         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2476     } else {
2477         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2478         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2479     }
2480
2481     BEGIN_BCS_BATCH(batch, 3);
2482     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2483     OUT_BCS_BATCH(batch,
2484                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2485                   (chroma_type << 0));
2486     OUT_BCS_BATCH(batch,
2487                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2488                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2489     ADVANCE_BCS_BATCH(batch);
2490 }
2491
2492 static const int va_to_gen7_jpeg_hufftable[2] = {
2493     MFX_HUFFTABLE_ID_Y,
2494     MFX_HUFFTABLE_ID_UV
2495 };
2496
2497 static void
2498 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2499                                struct decode_state *decode_state,
2500                                struct gen7_mfd_context *gen7_mfd_context,
2501                                int num_tables)
2502 {
2503     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2504     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2505     int index;
2506
2507     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2508         return;
2509
2510     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2511
2512     for (index = 0; index < num_tables; index++) {
2513         int id = va_to_gen7_jpeg_hufftable[index];
2514         if (!huffman_table->load_huffman_table[index])
2515             continue;
2516         BEGIN_BCS_BATCH(batch, 53);
2517         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2518         OUT_BCS_BATCH(batch, id);
2519         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2520         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2521         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2522         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2523         ADVANCE_BCS_BATCH(batch);
2524     }
2525 }
2526
2527 static const int va_to_gen7_jpeg_qm[5] = {
2528     -1,
2529     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2530     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2531     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2532     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2533 };
2534
2535 static void
2536 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2537                        struct decode_state *decode_state,
2538                        struct gen7_mfd_context *gen7_mfd_context)
2539 {
2540     VAPictureParameterBufferJPEGBaseline *pic_param;
2541     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2542     int index;
2543
2544     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2545         return;
2546
2547     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2548     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2549
2550     assert(pic_param->num_components <= 3);
2551
2552     for (index = 0; index < pic_param->num_components; index++) {
2553         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2554         int qm_type;
2555         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2556         unsigned char raster_qm[64];
2557         int j;
2558
2559         if (id > 4 || id < 1)
2560             continue;
2561
2562         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2563             continue;
2564
2565         qm_type = va_to_gen7_jpeg_qm[id];
2566
2567         for (j = 0; j < 64; j++)
2568             raster_qm[zigzag_direct[j]] = qm[j];
2569
2570         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2571     }
2572 }
2573
2574 static void
2575 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2576                          VAPictureParameterBufferJPEGBaseline *pic_param,
2577                          VASliceParameterBufferJPEGBaseline *slice_param,
2578                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2579                          dri_bo *slice_data_bo,
2580                          struct gen7_mfd_context *gen7_mfd_context)
2581 {
2582     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2583     int scan_component_mask = 0;
2584     int i;
2585
2586     assert(slice_param->num_components > 0);
2587     assert(slice_param->num_components < 4);
2588     assert(slice_param->num_components <= pic_param->num_components);
2589
2590     for (i = 0; i < slice_param->num_components; i++) {
2591         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2592         case 1:
2593             scan_component_mask |= (1 << 0);
2594             break;
2595         case 2:
2596             scan_component_mask |= (1 << 1);
2597             break;
2598         case 3:
2599             scan_component_mask |= (1 << 2);
2600             break;
2601         default:
2602             assert(0);
2603             break;
2604         }
2605     }
2606
2607     BEGIN_BCS_BATCH(batch, 6);
2608     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2609     OUT_BCS_BATCH(batch,
2610                   slice_param->slice_data_size);
2611     OUT_BCS_BATCH(batch,
2612                   slice_param->slice_data_offset);
2613     OUT_BCS_BATCH(batch,
2614                   slice_param->slice_horizontal_position << 16 |
2615                   slice_param->slice_vertical_position << 0);
2616     OUT_BCS_BATCH(batch,
2617                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2618                   (scan_component_mask << 27) |                 /* scan components */
2619                   (0 << 26) |   /* disable interrupt allowed */
2620                   (slice_param->num_mcus << 0));                /* MCU count */
2621     OUT_BCS_BATCH(batch,
2622                   (slice_param->restart_interval << 0));    /* RestartInterval */
2623     ADVANCE_BCS_BATCH(batch);
2624 }
2625
2626 /* Workaround for JPEG decoding on Ivybridge */
2627 #ifdef JPEG_WA
2628
2629 static struct {
2630     int width;
2631     int height;
2632     unsigned char data[32];
2633     int data_size;
2634     int data_bit_offset;
2635     int qp;
2636 } gen7_jpeg_wa_clip = {
2637     16,
2638     16,
2639     {
2640         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2641         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2642     },
2643     14,
2644     40,
2645     28,
2646 };
2647
2648 static void
2649 gen8_jpeg_wa_init(VADriverContextP ctx,
2650                   struct gen7_mfd_context *gen7_mfd_context)
2651 {
2652     struct i965_driver_data *i965 = i965_driver_data(ctx);
2653     VAStatus status;
2654     struct object_surface *obj_surface;
2655
2656     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2657         i965_DestroySurfaces(ctx,
2658                              &gen7_mfd_context->jpeg_wa_surface_id,
2659                              1);
2660
2661     status = i965_CreateSurfaces(ctx,
2662                                  gen7_jpeg_wa_clip.width,
2663                                  gen7_jpeg_wa_clip.height,
2664                                  VA_RT_FORMAT_YUV420,
2665                                  1,
2666                                  &gen7_mfd_context->jpeg_wa_surface_id);
2667     assert(status == VA_STATUS_SUCCESS);
2668
2669     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2670     assert(obj_surface);
2671     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2672     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2673
2674     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2675         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2676                                                                "JPEG WA data",
2677                                                                0x1000,
2678                                                                0x1000);
2679         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2680                        0,
2681                        gen7_jpeg_wa_clip.data_size,
2682                        gen7_jpeg_wa_clip.data);
2683     }
2684 }
2685
2686 static void
2687 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2688                               struct gen7_mfd_context *gen7_mfd_context)
2689 {
2690     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2691
2692     BEGIN_BCS_BATCH(batch, 5);
2693     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2694     OUT_BCS_BATCH(batch,
2695                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2696                   (MFD_MODE_VLD << 15) | /* VLD mode */
2697                   (0 << 10) | /* disable Stream-Out */
2698                   (0 << 9)  | /* Post Deblocking Output */
2699                   (1 << 8)  | /* Pre Deblocking Output */
2700                   (0 << 5)  | /* not in stitch mode */
2701                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2702                   (MFX_FORMAT_AVC << 0));
2703     OUT_BCS_BATCH(batch,
2704                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2705                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2706                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2707                   (0 << 1)  |
2708                   (0 << 0));
2709     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2710     OUT_BCS_BATCH(batch, 0); /* reserved */
2711     ADVANCE_BCS_BATCH(batch);
2712 }
2713
2714 static void
2715 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2716                            struct gen7_mfd_context *gen7_mfd_context)
2717 {
2718     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2719     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2720
2721     BEGIN_BCS_BATCH(batch, 6);
2722     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2723     OUT_BCS_BATCH(batch, 0);
2724     OUT_BCS_BATCH(batch,
2725                   ((obj_surface->orig_width - 1) << 18) |
2726                   ((obj_surface->orig_height - 1) << 4));
2727     OUT_BCS_BATCH(batch,
2728                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2729                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2730                   (0 << 22) | /* surface object control state, ignored */
2731                   ((obj_surface->width - 1) << 3) | /* pitch */
2732                   (0 << 2)  | /* must be 0 */
2733                   (1 << 1)  | /* must be tiled */
2734                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2735     OUT_BCS_BATCH(batch,
2736                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2737                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2738     OUT_BCS_BATCH(batch,
2739                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2740                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2741     ADVANCE_BCS_BATCH(batch);
2742 }
2743
2744 static void
2745 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2746                                  struct gen7_mfd_context *gen7_mfd_context)
2747 {
2748     struct i965_driver_data *i965 = i965_driver_data(ctx);
2749     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2750     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2751     dri_bo *intra_bo;
2752     int i;
2753
2754     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2755                             "intra row store",
2756                             128 * 64,
2757                             0x1000);
2758
2759     BEGIN_BCS_BATCH(batch, 61);
2760     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2761     OUT_BCS_RELOC64(batch,
2762                     obj_surface->bo,
2763                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2764                     0);
2765     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2766
2767
2768     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2769     OUT_BCS_BATCH(batch, 0);
2770     OUT_BCS_BATCH(batch, 0);
2771
2772     /* uncompressed-video & stream out 7-12 */
2773     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2774     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2775     OUT_BCS_BATCH(batch, 0);
2776     OUT_BCS_BATCH(batch, 0);
2777     OUT_BCS_BATCH(batch, 0);
2778     OUT_BCS_BATCH(batch, 0);
2779
2780     /* the DW 13-15 is for intra row store scratch */
2781     OUT_BCS_RELOC64(batch,
2782                     intra_bo,
2783                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2784                     0);
2785
2786     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2787
2788     /* the DW 16-18 is for deblocking filter */
2789     OUT_BCS_BATCH(batch, 0);
2790     OUT_BCS_BATCH(batch, 0);
2791     OUT_BCS_BATCH(batch, 0);
2792
2793     /* DW 19..50 */
2794     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2795         OUT_BCS_BATCH(batch, 0);
2796         OUT_BCS_BATCH(batch, 0);
2797     }
2798     OUT_BCS_BATCH(batch, 0);
2799
2800     /* the DW52-54 is for mb status address */
2801     OUT_BCS_BATCH(batch, 0);
2802     OUT_BCS_BATCH(batch, 0);
2803     OUT_BCS_BATCH(batch, 0);
2804     /* the DW56-60 is for ILDB & second ILDB address */
2805     OUT_BCS_BATCH(batch, 0);
2806     OUT_BCS_BATCH(batch, 0);
2807     OUT_BCS_BATCH(batch, 0);
2808     OUT_BCS_BATCH(batch, 0);
2809     OUT_BCS_BATCH(batch, 0);
2810     OUT_BCS_BATCH(batch, 0);
2811
2812     ADVANCE_BCS_BATCH(batch);
2813
2814     dri_bo_unreference(intra_bo);
2815 }
2816
2817 static void
2818 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2819                                      struct gen7_mfd_context *gen7_mfd_context)
2820 {
2821     struct i965_driver_data *i965 = i965_driver_data(ctx);
2822     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2823     dri_bo *bsd_mpc_bo, *mpr_bo;
2824
2825     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2826                               "bsd mpc row store",
2827                               11520, /* 1.5 * 120 * 64 */
2828                               0x1000);
2829
2830     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2831                           "mpr row store",
2832                           7680, /* 1. 0 * 120 * 64 */
2833                           0x1000);
2834
2835     BEGIN_BCS_BATCH(batch, 10);
2836     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2837
2838     OUT_BCS_RELOC64(batch,
2839                     bsd_mpc_bo,
2840                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2841                     0);
2842
2843     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2844
2845     OUT_BCS_RELOC64(batch,
2846                     mpr_bo,
2847                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2848                     0);
2849     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2850
2851     OUT_BCS_BATCH(batch, 0);
2852     OUT_BCS_BATCH(batch, 0);
2853     OUT_BCS_BATCH(batch, 0);
2854
2855     ADVANCE_BCS_BATCH(batch);
2856
2857     dri_bo_unreference(bsd_mpc_bo);
2858     dri_bo_unreference(mpr_bo);
2859 }
2860
2861 static void
2862 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2863                           struct gen7_mfd_context *gen7_mfd_context)
2864 {
2865
2866 }
2867
2868 static void
2869 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2870                            struct gen7_mfd_context *gen7_mfd_context)
2871 {
2872     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2873     int img_struct = 0;
2874     int mbaff_frame_flag = 0;
2875     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2876
2877     BEGIN_BCS_BATCH(batch, 16);
2878     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2879     OUT_BCS_BATCH(batch,
2880                   width_in_mbs * height_in_mbs);
2881     OUT_BCS_BATCH(batch,
2882                   ((height_in_mbs - 1) << 16) |
2883                   ((width_in_mbs - 1) << 0));
2884     OUT_BCS_BATCH(batch,
2885                   (0 << 24) |
2886                   (0 << 16) |
2887                   (0 << 14) |
2888                   (0 << 13) |
2889                   (0 << 12) | /* differ from GEN6 */
2890                   (0 << 10) |
2891                   (img_struct << 8));
2892     OUT_BCS_BATCH(batch,
2893                   (1 << 10) | /* 4:2:0 */
2894                   (1 << 7) |  /* CABAC */
2895                   (0 << 6) |
2896                   (0 << 5) |
2897                   (0 << 4) |
2898                   (0 << 3) |
2899                   (1 << 2) |
2900                   (mbaff_frame_flag << 1) |
2901                   (0 << 0));
2902     OUT_BCS_BATCH(batch, 0);
2903     OUT_BCS_BATCH(batch, 0);
2904     OUT_BCS_BATCH(batch, 0);
2905     OUT_BCS_BATCH(batch, 0);
2906     OUT_BCS_BATCH(batch, 0);
2907     OUT_BCS_BATCH(batch, 0);
2908     OUT_BCS_BATCH(batch, 0);
2909     OUT_BCS_BATCH(batch, 0);
2910     OUT_BCS_BATCH(batch, 0);
2911     OUT_BCS_BATCH(batch, 0);
2912     OUT_BCS_BATCH(batch, 0);
2913     ADVANCE_BCS_BATCH(batch);
2914 }
2915
2916 static void
2917 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2918                                   struct gen7_mfd_context *gen7_mfd_context)
2919 {
2920     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2921     int i;
2922
2923     BEGIN_BCS_BATCH(batch, 71);
2924     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2925
2926     /* reference surfaces 0..15 */
2927     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2928         OUT_BCS_BATCH(batch, 0); /* top */
2929         OUT_BCS_BATCH(batch, 0); /* bottom */
2930     }
2931
2932     OUT_BCS_BATCH(batch, 0);
2933
2934     /* the current decoding frame/field */
2935     OUT_BCS_BATCH(batch, 0); /* top */
2936     OUT_BCS_BATCH(batch, 0);
2937     OUT_BCS_BATCH(batch, 0);
2938
2939     /* POC List */
2940     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2941         OUT_BCS_BATCH(batch, 0);
2942         OUT_BCS_BATCH(batch, 0);
2943     }
2944
2945     OUT_BCS_BATCH(batch, 0);
2946     OUT_BCS_BATCH(batch, 0);
2947
2948     ADVANCE_BCS_BATCH(batch);
2949 }
2950
2951 static void
2952 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2953                                      struct gen7_mfd_context *gen7_mfd_context)
2954 {
2955     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2956
2957     BEGIN_BCS_BATCH(batch, 11);
2958     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2959     OUT_BCS_RELOC64(batch,
2960                     gen7_mfd_context->jpeg_wa_slice_data_bo,
2961                     I915_GEM_DOMAIN_INSTRUCTION, 0,
2962                     0);
2963     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2964     OUT_BCS_BATCH(batch, 0);
2965     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2966     OUT_BCS_BATCH(batch, 0);
2967     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2968     OUT_BCS_BATCH(batch, 0);
2969     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2970     OUT_BCS_BATCH(batch, 0);
2971     ADVANCE_BCS_BATCH(batch);
2972 }
2973
2974 static void
2975 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2976                             struct gen7_mfd_context *gen7_mfd_context)
2977 {
2978     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2979
2980     /* the input bitsteam format on GEN7 differs from GEN6 */
2981     BEGIN_BCS_BATCH(batch, 6);
2982     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2983     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2984     OUT_BCS_BATCH(batch, 0);
2985     OUT_BCS_BATCH(batch,
2986                   (0 << 31) |
2987                   (0 << 14) |
2988                   (0 << 12) |
2989                   (0 << 10) |
2990                   (0 << 8));
2991     OUT_BCS_BATCH(batch,
2992                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2993                   (0 << 5)  |
2994                   (0 << 4)  |
2995                   (1 << 3) | /* LastSlice Flag */
2996                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2997     OUT_BCS_BATCH(batch, 0);
2998     ADVANCE_BCS_BATCH(batch);
2999 }
3000
3001 static void
3002 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
3003                              struct gen7_mfd_context *gen7_mfd_context)
3004 {
3005     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3006     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
3007     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
3008     int first_mb_in_slice = 0;
3009     int slice_type = SLICE_TYPE_I;
3010
3011     BEGIN_BCS_BATCH(batch, 11);
3012     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3013     OUT_BCS_BATCH(batch, slice_type);
3014     OUT_BCS_BATCH(batch,
3015                   (num_ref_idx_l1 << 24) |
3016                   (num_ref_idx_l0 << 16) |
3017                   (0 << 8) |
3018                   (0 << 0));
3019     OUT_BCS_BATCH(batch,
3020                   (0 << 29) |
3021                   (1 << 27) |   /* disable Deblocking */
3022                   (0 << 24) |
3023                   (gen7_jpeg_wa_clip.qp << 16) |
3024                   (0 << 8) |
3025                   (0 << 0));
3026     OUT_BCS_BATCH(batch,
3027                   (slice_ver_pos << 24) |
3028                   (slice_hor_pos << 16) |
3029                   (first_mb_in_slice << 0));
3030     OUT_BCS_BATCH(batch,
3031                   (next_slice_ver_pos << 16) |
3032                   (next_slice_hor_pos << 0));
3033     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
3034     OUT_BCS_BATCH(batch, 0);
3035     OUT_BCS_BATCH(batch, 0);
3036     OUT_BCS_BATCH(batch, 0);
3037     OUT_BCS_BATCH(batch, 0);
3038     ADVANCE_BCS_BATCH(batch);
3039 }
3040
3041 static void
3042 gen8_mfd_jpeg_wa(VADriverContextP ctx,
3043                  struct gen7_mfd_context *gen7_mfd_context)
3044 {
3045     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3046     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
3047     intel_batchbuffer_emit_mi_flush(batch);
3048     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
3049     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
3050     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
3051     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
3052     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
3053     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
3054     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
3055
3056     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
3057     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
3058     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
3059 }
3060
3061 #endif
3062
3063 void
3064 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
3065                              struct decode_state *decode_state,
3066                              struct gen7_mfd_context *gen7_mfd_context)
3067 {
3068     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3069     VAPictureParameterBufferJPEGBaseline *pic_param;
3070     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
3071     dri_bo *slice_data_bo;
3072     int i, j, max_selector = 0;
3073
3074     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3075     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
3076
3077     /* Currently only support Baseline DCT */
3078     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
3079     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3080 #ifdef JPEG_WA
3081     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
3082 #endif
3083     intel_batchbuffer_emit_mi_flush(batch);
3084     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3085     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3086     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3087     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
3088     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
3089
3090     for (j = 0; j < decode_state->num_slice_params; j++) {
3091         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3092         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3093         slice_data_bo = decode_state->slice_datas[j]->bo;
3094         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3095
3096         if (j == decode_state->num_slice_params - 1)
3097             next_slice_group_param = NULL;
3098         else
3099             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3100
3101         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3102             int component;
3103
3104             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3105
3106             if (i < decode_state->slice_params[j]->num_elements - 1)
3107                 next_slice_param = slice_param + 1;
3108             else
3109                 next_slice_param = next_slice_group_param;
3110
3111             for (component = 0; component < slice_param->num_components; component++) {
3112                 if (max_selector < slice_param->components[component].dc_table_selector)
3113                     max_selector = slice_param->components[component].dc_table_selector;
3114
3115                 if (max_selector < slice_param->components[component].ac_table_selector)
3116                     max_selector = slice_param->components[component].ac_table_selector;
3117             }
3118
3119             slice_param++;
3120         }
3121     }
3122
3123     assert(max_selector < 2);
3124     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
3125
3126     for (j = 0; j < decode_state->num_slice_params; j++) {
3127         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3128         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3129         slice_data_bo = decode_state->slice_datas[j]->bo;
3130         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3131
3132         if (j == decode_state->num_slice_params - 1)
3133             next_slice_group_param = NULL;
3134         else
3135             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3136
3137         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3138             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3139
3140             if (i < decode_state->slice_params[j]->num_elements - 1)
3141                 next_slice_param = slice_param + 1;
3142             else
3143                 next_slice_param = next_slice_group_param;
3144
3145             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
3146             slice_param++;
3147         }
3148     }
3149
3150     intel_batchbuffer_end_atomic(batch);
3151     intel_batchbuffer_flush(batch);
3152 }
3153
3154 static const int vp8_dc_qlookup[128] = {
3155     4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
3156     18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
3157     29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
3158     44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
3159     59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
3160     75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
3161     91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
3162     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
3163 };
3164
3165 static const int vp8_ac_qlookup[128] = {
3166     4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
3167     20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
3168     36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
3169     52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
3170     78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
3171     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
3172     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
3173     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
3174 };
3175
3176 static inline unsigned int vp8_clip_quantization_index(int index)
3177 {
3178     if (index > 127)
3179         return 127;
3180     else if (index < 0)
3181         return 0;
3182
3183     return index;
3184 }
3185
3186 static void
3187 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
3188                          struct decode_state *decode_state,
3189                          struct gen7_mfd_context *gen7_mfd_context)
3190 {
3191     struct object_surface *obj_surface;
3192     struct i965_driver_data *i965 = i965_driver_data(ctx);
3193     dri_bo *bo;
3194     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3195     int width_in_mbs = (pic_param->frame_width + 15) / 16;
3196     int height_in_mbs = (pic_param->frame_height + 15) / 16;
3197
3198     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
3199     assert(height_in_mbs > 0 && height_in_mbs <= 256);
3200
3201     intel_update_vp8_frame_store_index(ctx,
3202                                        decode_state,
3203                                        pic_param,
3204                                        gen7_mfd_context->reference_surface);
3205
3206     /* Current decoded picture */
3207     obj_surface = decode_state->render_object;
3208     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3209
3210     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3211     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
3212     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
3213     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
3214
3215     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3216     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
3217     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
3218     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
3219
3220     intel_ensure_vp8_segmentation_buffer(ctx,
3221                                          &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
3222
3223     /* The same as AVC */
3224     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3225     bo = dri_bo_alloc(i965->intel.bufmgr,
3226                       "intra row store",
3227                       width_in_mbs * 64,
3228                       0x1000);
3229     assert(bo);
3230     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
3231     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
3232
3233     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3234     bo = dri_bo_alloc(i965->intel.bufmgr,
3235                       "deblocking filter row store",
3236                       width_in_mbs * 64 * 4,
3237                       0x1000);
3238     assert(bo);
3239     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
3240     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
3241
3242     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3243     bo = dri_bo_alloc(i965->intel.bufmgr,
3244                       "bsd mpc row store",
3245                       width_in_mbs * 64 * 2,
3246                       0x1000);
3247     assert(bo);
3248     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
3249     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
3250
3251     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3252     bo = dri_bo_alloc(i965->intel.bufmgr,
3253                       "mpr row store",
3254                       width_in_mbs * 64 * 2,
3255                       0x1000);
3256     assert(bo);
3257     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
3258     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
3259
3260     gen7_mfd_context->bitplane_read_buffer.valid = 0;
3261 }
3262
3263 static void
3264 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
3265                        struct decode_state *decode_state,
3266                        struct gen7_mfd_context *gen7_mfd_context)
3267 {
3268     struct i965_driver_data *i965 = i965_driver_data(ctx);
3269     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3270     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3271     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
3272     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
3273     dri_bo *probs_bo = decode_state->probability_data->bo;
3274     int i, j, log2num;
3275     unsigned int quantization_value[4][6];
3276
3277     /* There is no safe way to error out if the segmentation buffer
3278        could not be allocated. So, instead of aborting, simply decode
3279        something even if the result may look totally inacurate */
3280     const unsigned int enable_segmentation =
3281         pic_param->pic_fields.bits.segmentation_enabled &&
3282         gen7_mfd_context->segmentation_buffer.valid;
3283
3284     log2num = (int)log2(slice_param->num_of_partitions - 1);
3285
3286     BEGIN_BCS_BATCH(batch, 38);
3287     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
3288     OUT_BCS_BATCH(batch,
3289                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
3290                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
3291     OUT_BCS_BATCH(batch,
3292                   log2num << 24 |
3293                   pic_param->pic_fields.bits.sharpness_level << 16 |
3294                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
3295                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
3296                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
3297                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
3298                   (enable_segmentation &&
3299                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
3300                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
3301                   (enable_segmentation &&
3302                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
3303                   (enable_segmentation &&
3304                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
3305                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
3306                   pic_param->pic_fields.bits.filter_type << 4 |
3307                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
3308                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
3309
3310     OUT_BCS_BATCH(batch,
3311                   pic_param->loop_filter_level[3] << 24 |
3312                   pic_param->loop_filter_level[2] << 16 |
3313                   pic_param->loop_filter_level[1] <<  8 |
3314                   pic_param->loop_filter_level[0] <<  0);
3315
3316     /* Quantizer Value for 4 segmetns, DW4-DW15 */
3317     for (i = 0; i < 4; i++) {
3318         quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
3319         quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
3320         quantization_value[i][2] = 2 * vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])]; /*y2dc*/
3321         /* 101581>>16 is equivalent to 155/100 */
3322         quantization_value[i][3] = (101581 * vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16; /*y2ac*/
3323         quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
3324         quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
3325
3326         quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
3327         quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
3328
3329         OUT_BCS_BATCH(batch,
3330                       quantization_value[i][0] << 16 | /* Y1AC */
3331                       quantization_value[i][1] <<  0); /* Y1DC */
3332         OUT_BCS_BATCH(batch,
3333                       quantization_value[i][5] << 16 | /* UVAC */
3334                       quantization_value[i][4] <<  0); /* UVDC */
3335         OUT_BCS_BATCH(batch,
3336                       quantization_value[i][3] << 16 | /* Y2AC */
3337                       quantization_value[i][2] <<  0); /* Y2DC */
3338     }
3339
3340     /* CoeffProbability table for non-key frame, DW16-DW18 */
3341     if (probs_bo) {
3342         OUT_BCS_RELOC64(batch, probs_bo,
3343                         0, I915_GEM_DOMAIN_INSTRUCTION,
3344                         0);
3345         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
3346     } else {
3347         OUT_BCS_BATCH(batch, 0);
3348         OUT_BCS_BATCH(batch, 0);
3349         OUT_BCS_BATCH(batch, 0);
3350     }
3351
3352     OUT_BCS_BATCH(batch,
3353                   pic_param->mb_segment_tree_probs[2] << 16 |
3354                   pic_param->mb_segment_tree_probs[1] <<  8 |
3355                   pic_param->mb_segment_tree_probs[0] <<  0);
3356
3357     OUT_BCS_BATCH(batch,
3358                   pic_param->prob_skip_false << 24 |
3359                   pic_param->prob_intra      << 16 |
3360                   pic_param->prob_last       <<  8 |
3361                   pic_param->prob_gf         <<  0);
3362
3363     OUT_BCS_BATCH(batch,
3364                   pic_param->y_mode_probs[3] << 24 |
3365                   pic_param->y_mode_probs[2] << 16 |
3366                   pic_param->y_mode_probs[1] <<  8 |
3367                   pic_param->y_mode_probs[0] <<  0);
3368
3369     OUT_BCS_BATCH(batch,
3370                   pic_param->uv_mode_probs[2] << 16 |
3371                   pic_param->uv_mode_probs[1] <<  8 |
3372                   pic_param->uv_mode_probs[0] <<  0);
3373
3374     /* MV update value, DW23-DW32 */
3375     for (i = 0; i < 2; i++) {
3376         for (j = 0; j < 20; j += 4) {
3377             OUT_BCS_BATCH(batch,
3378                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
3379                           pic_param->mv_probs[i][j + 2] << 16 |
3380                           pic_param->mv_probs[i][j + 1] <<  8 |
3381                           pic_param->mv_probs[i][j + 0] <<  0);
3382         }
3383     }
3384
3385     OUT_BCS_BATCH(batch,
3386                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
3387                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
3388                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
3389                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
3390
3391     OUT_BCS_BATCH(batch,
3392                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
3393                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
3394                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
3395                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
3396
3397     /* segmentation id stream base address, DW35-DW37 */
3398     if (enable_segmentation) {
3399         OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
3400                         0, I915_GEM_DOMAIN_INSTRUCTION,
3401                         0);
3402         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
3403     } else {
3404         OUT_BCS_BATCH(batch, 0);
3405         OUT_BCS_BATCH(batch, 0);
3406         OUT_BCS_BATCH(batch, 0);
3407     }
3408     ADVANCE_BCS_BATCH(batch);
3409 }
3410
3411 static void
3412 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
3413                         VAPictureParameterBufferVP8 *pic_param,
3414                         VASliceParameterBufferVP8 *slice_param,
3415                         dri_bo *slice_data_bo,
3416                         struct gen7_mfd_context *gen7_mfd_context)
3417 {
3418     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3419     int i, log2num;
3420     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7) >> 3);
3421     unsigned int used_bits = 8 - pic_param->bool_coder_ctx.count;
3422     unsigned int partition_size_0 = slice_param->partition_size[0];
3423
3424     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
3425     if (used_bits == 8) {
3426         used_bits = 0;
3427         offset += 1;
3428         partition_size_0 -= 1;
3429     }
3430
3431     assert(slice_param->num_of_partitions >= 2);
3432     assert(slice_param->num_of_partitions <= 9);
3433
3434     log2num = (int)log2(slice_param->num_of_partitions - 1);
3435
3436     BEGIN_BCS_BATCH(batch, 22);
3437     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
3438     OUT_BCS_BATCH(batch,
3439                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
3440                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
3441                   log2num << 4 |
3442                   (slice_param->macroblock_offset & 0x7));
3443     OUT_BCS_BATCH(batch,
3444                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3445                   0);
3446
3447     OUT_BCS_BATCH(batch, partition_size_0 + 1);
3448     OUT_BCS_BATCH(batch, offset);
3449     //partion sizes in bytes are present after the above first partition when there are more than one token partition
3450     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3451     for (i = 1; i < 9; i++) {
3452         if (i < slice_param->num_of_partitions) {
3453             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3454             OUT_BCS_BATCH(batch, offset);
3455         } else {
3456             OUT_BCS_BATCH(batch, 0);
3457             OUT_BCS_BATCH(batch, 0);
3458         }
3459
3460         offset += slice_param->partition_size[i];
3461     }
3462
3463     OUT_BCS_BATCH(batch, 0); /* concealment method */
3464
3465     ADVANCE_BCS_BATCH(batch);
3466 }
3467
3468 void
3469 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3470                             struct decode_state *decode_state,
3471                             struct gen7_mfd_context *gen7_mfd_context)
3472 {
3473     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3474     VAPictureParameterBufferVP8 *pic_param;
3475     VASliceParameterBufferVP8 *slice_param;
3476     dri_bo *slice_data_bo;
3477
3478     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3479     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3480
3481     /* one slice per frame */
3482     if (decode_state->num_slice_params != 1 ||
3483         (!decode_state->slice_params ||
3484          !decode_state->slice_params[0] ||
3485          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3486         (!decode_state->slice_datas ||
3487          !decode_state->slice_datas[0] ||
3488          !decode_state->slice_datas[0]->bo) ||
3489         !decode_state->probability_data) {
3490         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3491
3492         return;
3493     }
3494
3495     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3496     slice_data_bo = decode_state->slice_datas[0]->bo;
3497
3498     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3499     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3500     intel_batchbuffer_emit_mi_flush(batch);
3501     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3502     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3503     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3504     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3505     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3506     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3507     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3508     intel_batchbuffer_end_atomic(batch);
3509     intel_batchbuffer_flush(batch);
3510 }
3511
3512 static VAStatus
3513 gen8_mfd_decode_picture(VADriverContextP ctx,
3514                         VAProfile profile,
3515                         union codec_state *codec_state,
3516                         struct hw_context *hw_context)
3517
3518 {
3519     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3520     struct decode_state *decode_state = &codec_state->decode;
3521     VAStatus vaStatus;
3522
3523     assert(gen7_mfd_context);
3524
3525     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3526
3527     if (vaStatus != VA_STATUS_SUCCESS)
3528         goto out;
3529
3530     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3531
3532     switch (profile) {
3533     case VAProfileMPEG2Simple:
3534     case VAProfileMPEG2Main:
3535         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3536         break;
3537
3538     case VAProfileH264ConstrainedBaseline:
3539     case VAProfileH264Main:
3540     case VAProfileH264High:
3541     case VAProfileH264StereoHigh:
3542     case VAProfileH264MultiviewHigh:
3543         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3544         break;
3545
3546     case VAProfileVC1Simple:
3547     case VAProfileVC1Main:
3548     case VAProfileVC1Advanced:
3549         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3550         break;
3551
3552     case VAProfileJPEGBaseline:
3553         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3554         break;
3555
3556     case VAProfileVP8Version0_3:
3557         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3558         break;
3559
3560     default:
3561         assert(0);
3562         break;
3563     }
3564
3565     vaStatus = VA_STATUS_SUCCESS;
3566
3567 out:
3568     return vaStatus;
3569 }
3570
3571 static void
3572 gen8_mfd_context_destroy(void *hw_context)
3573 {
3574     VADriverContextP ctx;
3575     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3576
3577     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3578
3579     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3580     gen7_mfd_context->post_deblocking_output.bo = NULL;
3581
3582     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3583     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3584
3585     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3586     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3587
3588     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3589     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3590
3591     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3592     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3593
3594     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3595     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3596
3597     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3598     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3599
3600     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3601     gen7_mfd_context->segmentation_buffer.bo = NULL;
3602
3603     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3604
3605     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3606         i965_DestroySurfaces(ctx,
3607                              &gen7_mfd_context->jpeg_wa_surface_id,
3608                              1);
3609         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3610     }
3611
3612     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3613     free(gen7_mfd_context);
3614 }
3615
3616 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3617                                         struct gen7_mfd_context *gen7_mfd_context)
3618 {
3619     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3620     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3621     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3622     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3623 }
3624
3625 struct hw_context *
3626 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3627 {
3628     struct intel_driver_data *intel = intel_driver_data(ctx);
3629     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3630     int i;
3631
3632     if (!gen7_mfd_context)
3633         return NULL;
3634
3635     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3636     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3637     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3638
3639     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3640         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3641         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3642     }
3643
3644     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3645     gen7_mfd_context->segmentation_buffer.valid = 0;
3646
3647     switch (obj_config->profile) {
3648     case VAProfileMPEG2Simple:
3649     case VAProfileMPEG2Main:
3650         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3651         break;
3652
3653     case VAProfileH264ConstrainedBaseline:
3654     case VAProfileH264Main:
3655     case VAProfileH264High:
3656     case VAProfileH264StereoHigh:
3657     case VAProfileH264MultiviewHigh:
3658         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3659         break;
3660     default:
3661         break;
3662     }
3663
3664     gen7_mfd_context->driver_context = ctx;
3665     return (struct hw_context *)gen7_mfd_context;
3666 }