OSDN Git Service

d5c76a18bf408a5a621e704da00780683bae56e1
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV     2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77
78         if (!gen7_avc_surface)
79             return;
80
81         gen7_avc_surface->base.frame_store_id = -1;
82         assert((obj_surface->size & 0x3f) == 0);
83         obj_surface->private_data = gen7_avc_surface;
84     }
85
86     /* DMV buffers now relate to the whole frame, irrespective of
87        field coding modes */
88     if (gen7_avc_surface->dmv_top == NULL) {
89         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90                                                  "direct mv w/r buffer",
91                                                  width_in_mbs * height_in_mbs * 128,
92                                                  0x1000);
93         assert(gen7_avc_surface->dmv_top);
94     }
95 }
96
97 static void
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99                           struct decode_state *decode_state,
100                           int standard_select,
101                           struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG ||
109            standard_select == MFX_FORMAT_VP8);
110
111     BEGIN_BCS_BATCH(batch, 5);
112     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
113     OUT_BCS_BATCH(batch,
114                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
115                   (MFD_MODE_VLD << 15) | /* VLD mode */
116                   (0 << 10) | /* disable Stream-Out */
117                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
118                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
119                   (0 << 5)  | /* not in stitch mode */
120                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
121                   (standard_select << 0));
122     OUT_BCS_BATCH(batch,
123                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
124                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
125                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
126                   (0 << 1)  |
127                   (0 << 0));
128     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
129     OUT_BCS_BATCH(batch, 0); /* reserved */
130     ADVANCE_BCS_BATCH(batch);
131 }
132
133 static void
134 gen8_mfd_surface_state(VADriverContextP ctx,
135                        struct decode_state *decode_state,
136                        int standard_select,
137                        struct gen7_mfd_context *gen7_mfd_context)
138 {
139     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140     struct object_surface *obj_surface = decode_state->render_object;
141     unsigned int y_cb_offset;
142     unsigned int y_cr_offset;
143     unsigned int surface_format;
144
145     assert(obj_surface);
146
147     y_cb_offset = obj_surface->y_cb_offset;
148     y_cr_offset = obj_surface->y_cr_offset;
149
150     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151                      MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
152
153     BEGIN_BCS_BATCH(batch, 6);
154     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155     OUT_BCS_BATCH(batch, 0);
156     OUT_BCS_BATCH(batch,
157                   ((obj_surface->orig_height - 1) << 18) |
158                   ((obj_surface->orig_width - 1) << 4));
159     OUT_BCS_BATCH(batch,
160                   (surface_format << 28) | /* 420 planar YUV surface */
161                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162                   (0 << 22) | /* surface object control state, ignored */
163                   ((obj_surface->width - 1) << 3) | /* pitch */
164                   (0 << 2)  | /* must be 0 */
165                   (1 << 1)  | /* must be tiled */
166                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
167     OUT_BCS_BATCH(batch,
168                   (0 << 16) | /* X offset for U(Cb), must be 0 */
169                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
170     OUT_BCS_BATCH(batch,
171                   (0 << 16) | /* X offset for V(Cr), must be 0 */
172                   ((standard_select == MFX_FORMAT_JPEG ? y_cr_offset : 0) << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173     ADVANCE_BCS_BATCH(batch);
174 }
175
176 static void
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178                              struct decode_state *decode_state,
179                              int standard_select,
180                              struct gen7_mfd_context *gen7_mfd_context)
181 {
182     struct i965_driver_data *i965 = i965_driver_data(ctx);
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188     /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                         0);
193     else {
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197     }
198     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
199
200     /* Post-debloing 4-6 */
201     if (gen7_mfd_context->post_deblocking_output.valid)
202         OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
203                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                         0);
205     else {
206         OUT_BCS_BATCH(batch, 0);
207
208         OUT_BCS_BATCH(batch, 0);
209     }
210     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
211
212     /* uncompressed-video & stream out 7-12 */
213     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
214     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
215     OUT_BCS_BATCH(batch, 0);
216     OUT_BCS_BATCH(batch, 0);
217     OUT_BCS_BATCH(batch, 0);
218     OUT_BCS_BATCH(batch, 0);
219
220     /* intra row-store scratch 13-15 */
221     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
222         OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
223                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
224                         0);
225     else {
226         OUT_BCS_BATCH(batch, 0);
227
228         OUT_BCS_BATCH(batch, 0);
229     }
230     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
231
232     /* deblocking-filter-row-store 16-18 */
233     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
234         OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
235                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
236                         0);
237     else {
238         OUT_BCS_BATCH(batch, 0);
239         OUT_BCS_BATCH(batch, 0);
240     }
241
242     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
243
244     /* DW 19..50 */
245     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
246         struct object_surface *obj_surface;
247
248         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
249             gen7_mfd_context->reference_surface[i].obj_surface &&
250             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
251             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
252
253             OUT_BCS_RELOC64(batch, obj_surface->bo,
254                             I915_GEM_DOMAIN_INSTRUCTION, 0,
255                             0);
256         } else {
257             OUT_BCS_BATCH(batch, 0);
258             OUT_BCS_BATCH(batch, 0);
259         }
260
261     }
262
263     /* reference property 51 */
264     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
265
266     /* Macroblock status & ILDB 52-57 */
267     OUT_BCS_BATCH(batch, 0);
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271     OUT_BCS_BATCH(batch, 0);
272     OUT_BCS_BATCH(batch, 0);
273
274     /* the second Macroblock status 58-60 */
275     OUT_BCS_BATCH(batch, 0);
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch, 0);
278
279     ADVANCE_BCS_BATCH(batch);
280 }
281
282 static void
283 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
284                                  dri_bo *slice_data_bo,
285                                  int standard_select,
286                                  struct gen7_mfd_context *gen7_mfd_context)
287 {
288     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
289     struct i965_driver_data *i965 = i965_driver_data(ctx);
290
291     BEGIN_BCS_BATCH(batch, 26);
292     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
293     /* MFX In BS 1-5 */
294     OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
295     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
296     /* Upper bound 4-5 */
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299
300     /* MFX indirect MV 6-10 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307     /* MFX IT_COFF 11-15 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314     /* MFX IT_DBLK 16-20 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321     /* MFX PAK_BSE object for encoder 21-25 */
322     OUT_BCS_BATCH(batch, 0);
323     OUT_BCS_BATCH(batch, 0);
324     OUT_BCS_BATCH(batch, 0);
325     OUT_BCS_BATCH(batch, 0);
326     OUT_BCS_BATCH(batch, 0);
327
328     ADVANCE_BCS_BATCH(batch);
329 }
330
331 static void
332 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
333                                  struct decode_state *decode_state,
334                                  int standard_select,
335                                  struct gen7_mfd_context *gen7_mfd_context)
336 {
337     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
338     struct i965_driver_data *i965 = i965_driver_data(ctx);
339
340     BEGIN_BCS_BATCH(batch, 10);
341     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
342
343     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
344         OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
345                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
346                         0);
347     else {
348         OUT_BCS_BATCH(batch, 0);
349         OUT_BCS_BATCH(batch, 0);
350     }
351
352     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
353     /* MPR Row Store Scratch buffer 4-6 */
354     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
355         OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
356                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
357                         0);
358     else {
359         OUT_BCS_BATCH(batch, 0);
360         OUT_BCS_BATCH(batch, 0);
361     }
362
363     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
364
365     /* Bitplane 7-9 */
366     if (gen7_mfd_context->bitplane_read_buffer.valid)
367         OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
368                         I915_GEM_DOMAIN_INSTRUCTION, 0,
369                         0);
370     else {
371         OUT_BCS_BATCH(batch, 0);
372         OUT_BCS_BATCH(batch, 0);
373     }
374     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
375     ADVANCE_BCS_BATCH(batch);
376 }
377
378 static void
379 gen8_mfd_qm_state(VADriverContextP ctx,
380                   int qm_type,
381                   unsigned char *qm,
382                   int qm_length,
383                   struct gen7_mfd_context *gen7_mfd_context)
384 {
385     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
386     unsigned int qm_buffer[16];
387
388     assert(qm_length <= 16 * 4);
389     memcpy(qm_buffer, qm, qm_length);
390
391     BEGIN_BCS_BATCH(batch, 18);
392     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
393     OUT_BCS_BATCH(batch, qm_type << 0);
394     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
395     ADVANCE_BCS_BATCH(batch);
396 }
397
398 static void
399 gen8_mfd_avc_img_state(VADriverContextP ctx,
400                        struct decode_state *decode_state,
401                        struct gen7_mfd_context *gen7_mfd_context)
402 {
403     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
404     int img_struct;
405     int mbaff_frame_flag;
406     unsigned int width_in_mbs, height_in_mbs;
407     VAPictureParameterBufferH264 *pic_param;
408
409     assert(decode_state->pic_param && decode_state->pic_param->buffer);
410     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
412
413     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
414         img_struct = 1;
415     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
416         img_struct = 3;
417     else
418         img_struct = 0;
419
420     if ((img_struct & 0x1) == 0x1) {
421         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
422     } else {
423         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
424     }
425
426     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
427         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
428         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
429     } else {
430         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
431     }
432
433     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
434                         !pic_param->pic_fields.bits.field_pic_flag);
435
436     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
437     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
438
439     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
440     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
441            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
442     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
443
444     BEGIN_BCS_BATCH(batch, 17);
445     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
446     OUT_BCS_BATCH(batch,
447                   (width_in_mbs * height_in_mbs - 1));
448     OUT_BCS_BATCH(batch,
449                   ((height_in_mbs - 1) << 16) |
450                   ((width_in_mbs - 1) << 0));
451     OUT_BCS_BATCH(batch,
452                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
453                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
454                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
455                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
456                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
457                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
458                   (img_struct << 8));
459     OUT_BCS_BATCH(batch,
460                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
461                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
462                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
463                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
464                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
465                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
466                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
467                   (mbaff_frame_flag << 1) |
468                   (pic_param->pic_fields.bits.field_pic_flag << 0));
469     OUT_BCS_BATCH(batch, 0);
470     OUT_BCS_BATCH(batch, 0);
471     OUT_BCS_BATCH(batch, 0);
472     OUT_BCS_BATCH(batch, 0);
473     OUT_BCS_BATCH(batch, 0);
474     OUT_BCS_BATCH(batch, 0);
475     OUT_BCS_BATCH(batch, 0);
476     OUT_BCS_BATCH(batch, 0);
477     OUT_BCS_BATCH(batch, 0);
478     OUT_BCS_BATCH(batch, 0);
479     OUT_BCS_BATCH(batch, 0);
480     OUT_BCS_BATCH(batch, 0);
481     ADVANCE_BCS_BATCH(batch);
482 }
483
484 static void
485 gen8_mfd_avc_qm_state(VADriverContextP ctx,
486                       struct decode_state *decode_state,
487                       struct gen7_mfd_context *gen7_mfd_context)
488 {
489     VAIQMatrixBufferH264 *iq_matrix;
490     VAPictureParameterBufferH264 *pic_param;
491
492     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
493         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
494     else
495         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
496
497     assert(decode_state->pic_param && decode_state->pic_param->buffer);
498     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
499
500     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
501     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
502
503     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
504         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
505         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
506     }
507 }
508
509 static inline void
510 gen8_mfd_avc_picid_state(VADriverContextP ctx,
511                          struct decode_state *decode_state,
512                          struct gen7_mfd_context *gen7_mfd_context)
513 {
514     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
515                                gen7_mfd_context->reference_surface);
516 }
517
518 static void
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520                               struct decode_state *decode_state,
521                               VAPictureParameterBufferH264 *pic_param,
522                               VASliceParameterBufferH264 *slice_param,
523                               struct gen7_mfd_context *gen7_mfd_context)
524 {
525     struct i965_driver_data *i965 = i965_driver_data(ctx);
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
545                             I915_GEM_DOMAIN_INSTRUCTION, 0,
546                             0);
547         } else {
548             OUT_BCS_BATCH(batch, 0);
549             OUT_BCS_BATCH(batch, 0);
550         }
551     }
552
553     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
554
555     /* the current decoding frame/field */
556     va_pic = &pic_param->CurrPic;
557     obj_surface = decode_state->render_object;
558     assert(obj_surface->bo && obj_surface->private_data);
559     gen7_avc_surface = obj_surface->private_data;
560
561     OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
562                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
563                     0);
564
565     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
566
567     /* POC List */
568     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
569         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
570
571         if (obj_surface) {
572             const VAPictureH264 * const va_pic = avc_find_picture(
573                                                      obj_surface->base.id, pic_param->ReferenceFrames,
574                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
575
576             assert(va_pic != NULL);
577             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
578             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
579         } else {
580             OUT_BCS_BATCH(batch, 0);
581             OUT_BCS_BATCH(batch, 0);
582         }
583     }
584
585     va_pic = &pic_param->CurrPic;
586     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
587     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
588
589     ADVANCE_BCS_BATCH(batch);
590 }
591
592 static void
593 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
594                                  VAPictureParameterBufferH264 *pic_param,
595                                  VASliceParameterBufferH264 *next_slice_param,
596                                  struct gen7_mfd_context *gen7_mfd_context)
597 {
598     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
599 }
600
601 static void
602 gen8_mfd_avc_slice_state(VADriverContextP ctx,
603                          VAPictureParameterBufferH264 *pic_param,
604                          VASliceParameterBufferH264 *slice_param,
605                          VASliceParameterBufferH264 *next_slice_param,
606                          struct gen7_mfd_context *gen7_mfd_context)
607 {
608     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
609     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
610     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
611     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
612     int num_ref_idx_l0, num_ref_idx_l1;
613     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
614                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
615     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
616     int slice_type;
617
618     if (slice_param->slice_type == SLICE_TYPE_I ||
619         slice_param->slice_type == SLICE_TYPE_SI) {
620         slice_type = SLICE_TYPE_I;
621     } else if (slice_param->slice_type == SLICE_TYPE_P ||
622                slice_param->slice_type == SLICE_TYPE_SP) {
623         slice_type = SLICE_TYPE_P;
624     } else {
625         assert(slice_param->slice_type == SLICE_TYPE_B);
626         slice_type = SLICE_TYPE_B;
627     }
628
629     if (slice_type == SLICE_TYPE_I) {
630         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
631         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
632         num_ref_idx_l0 = 0;
633         num_ref_idx_l1 = 0;
634     } else if (slice_type == SLICE_TYPE_P) {
635         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
637         num_ref_idx_l1 = 0;
638     } else {
639         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
640         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
641     }
642
643     first_mb_in_slice = slice_param->first_mb_in_slice;
644     slice_hor_pos = first_mb_in_slice % width_in_mbs;
645     slice_ver_pos = first_mb_in_slice / width_in_mbs;
646
647     if (mbaff_picture)
648         slice_ver_pos = slice_ver_pos << 1;
649     if (next_slice_param) {
650         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
651         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
652         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
653
654         if (mbaff_picture)
655             next_slice_ver_pos = next_slice_ver_pos << 1;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch,
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch,
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch,
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) |
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch,
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch,
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
839                                        gen7_mfd_context->reference_surface);
840     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
841     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
842     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
843     assert(height_in_mbs > 0 && height_in_mbs <= 256);
844
845     /* Current decoded picture */
846     obj_surface = decode_state->render_object;
847     if (pic_param->pic_fields.bits.reference_pic_flag)
848         obj_surface->flags |= SURFACE_REFERENCED;
849     else
850         obj_surface->flags &= ~SURFACE_REFERENCED;
851
852     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
853     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
854
855     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
856     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
857     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
858     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
859
860     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
861     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
862     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
863     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
864
865     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
866     bo = dri_bo_alloc(i965->intel.bufmgr,
867                       "intra row store",
868                       width_in_mbs * 64,
869                       0x1000);
870     assert(bo);
871     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
872     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
873
874     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
875     bo = dri_bo_alloc(i965->intel.bufmgr,
876                       "deblocking filter row store",
877                       width_in_mbs * 64 * 4,
878                       0x1000);
879     assert(bo);
880     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
881     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
882
883     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
884     bo = dri_bo_alloc(i965->intel.bufmgr,
885                       "bsd mpc row store",
886                       width_in_mbs * 64 * 2,
887                       0x1000);
888     assert(bo);
889     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
890     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
891
892     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
893     bo = dri_bo_alloc(i965->intel.bufmgr,
894                       "mpr row store",
895                       width_in_mbs * 64 * 2,
896                       0x1000);
897     assert(bo);
898     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
899     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
900
901     gen7_mfd_context->bitplane_read_buffer.valid = 0;
902 }
903
904 static void
905 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
906                             struct decode_state *decode_state,
907                             struct gen7_mfd_context *gen7_mfd_context)
908 {
909     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
910     VAPictureParameterBufferH264 *pic_param;
911     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
912     dri_bo *slice_data_bo;
913     int i, j;
914
915     assert(decode_state->pic_param && decode_state->pic_param->buffer);
916     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
917     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
918
919     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
920     intel_batchbuffer_emit_mi_flush(batch);
921     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
923     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
924     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
925     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
926     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
927     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
928
929     for (j = 0; j < decode_state->num_slice_params; j++) {
930         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
931         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
932         slice_data_bo = decode_state->slice_datas[j]->bo;
933         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
934
935         if (j == decode_state->num_slice_params - 1)
936             next_slice_group_param = NULL;
937         else
938             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
939
940         if (j == 0 && slice_param->first_mb_in_slice)
941             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
942
943         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
944             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
945             assert((slice_param->slice_type == SLICE_TYPE_I) ||
946                    (slice_param->slice_type == SLICE_TYPE_SI) ||
947                    (slice_param->slice_type == SLICE_TYPE_P) ||
948                    (slice_param->slice_type == SLICE_TYPE_SP) ||
949                    (slice_param->slice_type == SLICE_TYPE_B));
950
951             if (i < decode_state->slice_params[j]->num_elements - 1)
952                 next_slice_param = slice_param + 1;
953             else
954                 next_slice_param = next_slice_group_param;
955
956             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
957             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
958             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
959             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
960             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
961             slice_param++;
962         }
963     }
964
965     intel_batchbuffer_end_atomic(batch);
966     intel_batchbuffer_flush(batch);
967 }
968
969 static void
970 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
971                            struct decode_state *decode_state,
972                            struct gen7_mfd_context *gen7_mfd_context)
973 {
974     VAPictureParameterBufferMPEG2 *pic_param;
975     struct i965_driver_data *i965 = i965_driver_data(ctx);
976     struct object_surface *obj_surface;
977     dri_bo *bo;
978     unsigned int width_in_mbs;
979
980     assert(decode_state->pic_param && decode_state->pic_param->buffer);
981     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
982     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
983
984     mpeg2_set_reference_surfaces(
985         ctx,
986         gen7_mfd_context->reference_surface,
987         decode_state,
988         pic_param
989     );
990
991     /* Current decoded picture */
992     obj_surface = decode_state->render_object;
993     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
994
995     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
996     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
997     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
998     gen7_mfd_context->pre_deblocking_output.valid = 1;
999
1000     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1001     bo = dri_bo_alloc(i965->intel.bufmgr,
1002                       "bsd mpc row store",
1003                       width_in_mbs * 96,
1004                       0x1000);
1005     assert(bo);
1006     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1007     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1008
1009     gen7_mfd_context->post_deblocking_output.valid = 0;
1010     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1011     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1012     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1013     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1014 }
1015
1016 static void
1017 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1018                          struct decode_state *decode_state,
1019                          struct gen7_mfd_context *gen7_mfd_context)
1020 {
1021     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1022     VAPictureParameterBufferMPEG2 *pic_param;
1023     unsigned int slice_concealment_disable_bit = 0;
1024
1025     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1026     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1027
1028     slice_concealment_disable_bit = 1;
1029
1030     BEGIN_BCS_BATCH(batch, 13);
1031     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1032     OUT_BCS_BATCH(batch,
1033                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1034                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1035                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1036                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1037                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1038                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1039                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1040                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1041                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1042                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1043                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1044                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1045     OUT_BCS_BATCH(batch,
1046                   pic_param->picture_coding_type << 9);
1047     OUT_BCS_BATCH(batch,
1048                   (slice_concealment_disable_bit << 31) |
1049                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1050                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1051     OUT_BCS_BATCH(batch, 0);
1052     OUT_BCS_BATCH(batch, 0);
1053     OUT_BCS_BATCH(batch, 0);
1054     OUT_BCS_BATCH(batch, 0);
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     ADVANCE_BCS_BATCH(batch);
1061 }
1062
1063 static void
1064 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1065                         struct decode_state *decode_state,
1066                         struct gen7_mfd_context *gen7_mfd_context)
1067 {
1068     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1069     int i, j;
1070
1071     /* Update internal QM state */
1072     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1073         VAIQMatrixBufferMPEG2 * const iq_matrix =
1074             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1075
1076         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1077             iq_matrix->load_intra_quantiser_matrix) {
1078             gen_iq_matrix->load_intra_quantiser_matrix =
1079                 iq_matrix->load_intra_quantiser_matrix;
1080             if (iq_matrix->load_intra_quantiser_matrix) {
1081                 for (j = 0; j < 64; j++)
1082                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1083                         iq_matrix->intra_quantiser_matrix[j];
1084             }
1085         }
1086
1087         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1088             iq_matrix->load_non_intra_quantiser_matrix) {
1089             gen_iq_matrix->load_non_intra_quantiser_matrix =
1090                 iq_matrix->load_non_intra_quantiser_matrix;
1091             if (iq_matrix->load_non_intra_quantiser_matrix) {
1092                 for (j = 0; j < 64; j++)
1093                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1094                         iq_matrix->non_intra_quantiser_matrix[j];
1095             }
1096         }
1097     }
1098
1099     /* Commit QM state to HW */
1100     for (i = 0; i < 2; i++) {
1101         unsigned char *qm = NULL;
1102         int qm_type;
1103
1104         if (i == 0) {
1105             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1106                 qm = gen_iq_matrix->intra_quantiser_matrix;
1107                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1108             }
1109         } else {
1110             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1111                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1112                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1113             }
1114         }
1115
1116         if (!qm)
1117             continue;
1118
1119         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1120     }
1121 }
1122
1123 static void
1124 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1125                           VAPictureParameterBufferMPEG2 *pic_param,
1126                           VASliceParameterBufferMPEG2 *slice_param,
1127                           VASliceParameterBufferMPEG2 *next_slice_param,
1128                           struct gen7_mfd_context *gen7_mfd_context)
1129 {
1130     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1131     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1132     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1133
1134     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1135         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1136         is_field_pic = 1;
1137     is_field_pic_wa = is_field_pic &&
1138                       gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1139
1140     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1141     hpos0 = slice_param->slice_horizontal_position;
1142
1143     if (next_slice_param == NULL) {
1144         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1145         hpos1 = 0;
1146     } else {
1147         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1148         hpos1 = next_slice_param->slice_horizontal_position;
1149     }
1150
1151     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1152
1153     BEGIN_BCS_BATCH(batch, 5);
1154     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1155     OUT_BCS_BATCH(batch,
1156                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1157     OUT_BCS_BATCH(batch,
1158                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1159     OUT_BCS_BATCH(batch,
1160                   hpos0 << 24 |
1161                   vpos0 << 16 |
1162                   mb_count << 8 |
1163                   (next_slice_param == NULL) << 5 |
1164                   (next_slice_param == NULL) << 3 |
1165                   (slice_param->macroblock_offset & 0x7));
1166     OUT_BCS_BATCH(batch,
1167                   (slice_param->quantiser_scale_code << 24) |
1168                   (vpos1 << 8 | hpos1));
1169     ADVANCE_BCS_BATCH(batch);
1170 }
1171
1172 static void
1173 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1174                               struct decode_state *decode_state,
1175                               struct gen7_mfd_context *gen7_mfd_context)
1176 {
1177     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1178     VAPictureParameterBufferMPEG2 *pic_param;
1179     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1180     dri_bo *slice_data_bo;
1181     int i, j;
1182
1183     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1184     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1185
1186     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1187     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1188     intel_batchbuffer_emit_mi_flush(batch);
1189     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1194     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1195
1196     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1197         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1198             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1199
1200     for (j = 0; j < decode_state->num_slice_params; j++) {
1201         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1202         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1203         slice_data_bo = decode_state->slice_datas[j]->bo;
1204         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1205
1206         if (j == decode_state->num_slice_params - 1)
1207             next_slice_group_param = NULL;
1208         else
1209             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1210
1211         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1212             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1213
1214             if (i < decode_state->slice_params[j]->num_elements - 1)
1215                 next_slice_param = slice_param + 1;
1216             else
1217                 next_slice_param = next_slice_group_param;
1218
1219             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1220             slice_param++;
1221         }
1222     }
1223
1224     intel_batchbuffer_end_atomic(batch);
1225     intel_batchbuffer_flush(batch);
1226 }
1227
1228 static const int va_to_gen7_vc1_mv[4] = {
1229     1, /* 1-MV */
1230     2, /* 1-MV half-pel */
1231     3, /* 1-MV half-pef bilinear */
1232     0, /* Mixed MV */
1233 };
1234
1235 static const int b_picture_scale_factor[21] = {
1236     128, 85,  170, 64,  192,
1237     51,  102, 153, 204, 43,
1238     215, 37,  74,  111, 148,
1239     185, 222, 32,  96,  160,
1240     224,
1241 };
1242
1243 static const int va_to_gen7_vc1_condover[3] = {
1244     0,
1245     2,
1246     3
1247 };
1248
1249 static const int va_to_gen7_vc1_profile[4] = {
1250     GEN7_VC1_SIMPLE_PROFILE,
1251     GEN7_VC1_MAIN_PROFILE,
1252     GEN7_VC1_RESERVED_PROFILE,
1253     GEN7_VC1_ADVANCED_PROFILE
1254 };
1255
1256 static void
1257 gen8_mfd_free_vc1_surface(void **data)
1258 {
1259     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1260
1261     if (!gen7_vc1_surface)
1262         return;
1263
1264     dri_bo_unreference(gen7_vc1_surface->dmv);
1265     free(gen7_vc1_surface);
1266     *data = NULL;
1267 }
1268
1269 static void
1270 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1271                           VAPictureParameterBufferVC1 *pic_param,
1272                           struct object_surface *obj_surface)
1273 {
1274     struct i965_driver_data *i965 = i965_driver_data(ctx);
1275     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1276     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1277
1278     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1279
1280     if (!gen7_vc1_surface) {
1281         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1282
1283         if (!gen7_vc1_surface)
1284             return;
1285
1286         assert((obj_surface->size & 0x3f) == 0);
1287         obj_surface->private_data = gen7_vc1_surface;
1288     }
1289
1290     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1291     gen7_vc1_surface->intensity_compensation = 0;
1292     gen7_vc1_surface->luma_scale = 0;
1293     gen7_vc1_surface->luma_shift = 0;
1294
1295     if (gen7_vc1_surface->dmv == NULL) {
1296         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1297                                              "direct mv w/r buffer",
1298                                              128 * height_in_mbs * 64,
1299                                              0x1000);
1300     }
1301 }
1302
1303 static void
1304 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1305                          struct decode_state *decode_state,
1306                          struct gen7_mfd_context *gen7_mfd_context)
1307 {
1308     VAPictureParameterBufferVC1 *pic_param;
1309     struct i965_driver_data *i965 = i965_driver_data(ctx);
1310     struct object_surface *obj_surface;
1311     dri_bo *bo;
1312     int width_in_mbs;
1313     int picture_type;
1314     int intensity_compensation;
1315
1316     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1317     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1318     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1319     picture_type = pic_param->picture_fields.bits.picture_type;
1320     intensity_compensation = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1321
1322     intel_update_vc1_frame_store_index(ctx,
1323                                        decode_state,
1324                                        pic_param,
1325                                        gen7_mfd_context->reference_surface);
1326
1327     /* Forward reference picture */
1328     obj_surface = decode_state->reference_objects[0];
1329     if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1330         obj_surface &&
1331         obj_surface->private_data) {
1332         if (picture_type == 1 && intensity_compensation) { /* P picture */
1333             struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1334
1335             gen7_vc1_surface->intensity_compensation = intensity_compensation;
1336             gen7_vc1_surface->luma_scale = pic_param->luma_scale;
1337             gen7_vc1_surface->luma_shift = pic_param->luma_shift;
1338         }
1339     }
1340
1341     /* Current decoded picture */
1342     obj_surface = decode_state->render_object;
1343     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1344     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1345
1346     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1347     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1348     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1349
1350     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1351     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1352     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1353
1354     if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1355         gen7_mfd_context->post_deblocking_output.valid = 0;
1356         gen7_mfd_context->pre_deblocking_output.valid = 1;
1357     } else {
1358         gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1359         gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1360     }
1361
1362     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1363     bo = dri_bo_alloc(i965->intel.bufmgr,
1364                       "intra row store",
1365                       width_in_mbs * 64,
1366                       0x1000);
1367     assert(bo);
1368     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1369     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1370
1371     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1372     bo = dri_bo_alloc(i965->intel.bufmgr,
1373                       "deblocking filter row store",
1374                       width_in_mbs * 7 * 64,
1375                       0x1000);
1376     assert(bo);
1377     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1378     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1379
1380     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1381     bo = dri_bo_alloc(i965->intel.bufmgr,
1382                       "bsd mpc row store",
1383                       width_in_mbs * 96,
1384                       0x1000);
1385     assert(bo);
1386     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1387     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1388
1389     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1390
1391     if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1392         gen7_mfd_context->bitplane_read_buffer.valid = 1;
1393     else
1394         gen7_mfd_context->bitplane_read_buffer.valid = !!(pic_param->bitplane_present.value & 0x7f);
1395     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1396
1397     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1398         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1399         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1400         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1401         int src_w, src_h;
1402         uint8_t *src = NULL, *dst = NULL;
1403
1404         bo = dri_bo_alloc(i965->intel.bufmgr,
1405                           "VC-1 Bitplane",
1406                           bitplane_width * height_in_mbs,
1407                           0x1000);
1408         assert(bo);
1409         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1410
1411         dri_bo_map(bo, True);
1412         assert(bo->virtual);
1413         dst = bo->virtual;
1414
1415         if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1416             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1417                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1418                     int dst_index;
1419                     uint8_t src_value = 0x2;
1420
1421                     dst_index = src_w / 2;
1422                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1423                 }
1424
1425                 if (src_w & 1)
1426                     dst[src_w / 2] >>= 4;
1427
1428                 dst += bitplane_width;
1429             }
1430         } else {
1431             assert(decode_state->bit_plane->buffer);
1432             src = decode_state->bit_plane->buffer;
1433
1434             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1435                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1436                     int src_index, dst_index;
1437                     int src_shift;
1438                     uint8_t src_value;
1439
1440                     src_index = (src_h * width_in_mbs + src_w) / 2;
1441                     src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1442                     src_value = ((src[src_index] >> src_shift) & 0xf);
1443
1444                     dst_index = src_w / 2;
1445                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1446                 }
1447
1448                 if (src_w & 1)
1449                     dst[src_w / 2] >>= 4;
1450
1451                 dst += bitplane_width;
1452             }
1453         }
1454
1455         dri_bo_unmap(bo);
1456     } else
1457         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1458 }
1459
1460 static void
1461 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1462                        struct decode_state *decode_state,
1463                        struct gen7_mfd_context *gen7_mfd_context)
1464 {
1465     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1466     VAPictureParameterBufferVC1 *pic_param;
1467     struct object_surface *obj_surface;
1468     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1469     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1470     int unified_mv_mode;
1471     int ref_field_pic_polarity = 0;
1472     int scale_factor = 0;
1473     int trans_ac_y = 0;
1474     int dmv_surface_valid = 0;
1475     int brfd = 0;
1476     int fcm = 0;
1477     int picture_type;
1478     int ptype;
1479     int profile;
1480     int overlap = 0;
1481     int interpolation_mode = 0;
1482     int loopfilter = 0;
1483
1484     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1485     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1486
1487     picture_type = pic_param->picture_fields.bits.picture_type;
1488
1489     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1490     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1491     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1492     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1493     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1494     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1495     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1496     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1497
1498     if (dquant == 0) {
1499         alt_pquant_config = 0;
1500         alt_pquant_edge_mask = 0;
1501     } else if (dquant == 2) {
1502         alt_pquant_config = 1;
1503         alt_pquant_edge_mask = 0xf;
1504     } else {
1505         assert(dquant == 1);
1506         if (dquantfrm == 0) {
1507             alt_pquant_config = 0;
1508             alt_pquant_edge_mask = 0;
1509             alt_pq = 0;
1510         } else {
1511             assert(dquantfrm == 1);
1512             alt_pquant_config = 1;
1513
1514             switch (dqprofile) {
1515             case 3:
1516                 if (dqbilevel == 0) {
1517                     alt_pquant_config = 2;
1518                     alt_pquant_edge_mask = 0;
1519                 } else {
1520                     assert(dqbilevel == 1);
1521                     alt_pquant_config = 3;
1522                     alt_pquant_edge_mask = 0;
1523                 }
1524                 break;
1525
1526             case 0:
1527                 alt_pquant_edge_mask = 0xf;
1528                 break;
1529
1530             case 1:
1531                 if (dqdbedge == 3)
1532                     alt_pquant_edge_mask = 0x9;
1533                 else
1534                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1535
1536                 break;
1537
1538             case 2:
1539                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1540                 break;
1541
1542             default:
1543                 assert(0);
1544             }
1545         }
1546     }
1547
1548     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1549         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1550         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1551     } else {
1552         assert(pic_param->mv_fields.bits.mv_mode < 4);
1553         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1554     }
1555
1556     if (pic_param->sequence_fields.bits.interlace == 1 &&
1557         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1558         /* FIXME: calculate reference field picture polarity */
1559         assert(0);
1560         ref_field_pic_polarity = 0;
1561     }
1562
1563     if (pic_param->b_picture_fraction < 21)
1564         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1565
1566     if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1567         ptype = GEN7_VC1_P_PICTURE;
1568     else {
1569         ptype = pic_param->picture_fields.bits.picture_type;
1570         loopfilter = pic_param->entrypoint_fields.bits.loopfilter;
1571     }
1572
1573     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1574         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1575     else {
1576         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1577
1578         /*
1579          * 8.3.6.2.1 Transform Type Selection
1580          * If variable-sized transform coding is not enabled,
1581          * then the 8x8 transform shall be used for all blocks.
1582          * it is also MFX_VC1_PIC_STATE requirement.
1583          */
1584         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1585             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1586             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1587         }
1588     }
1589
1590     if (picture_type == GEN7_VC1_B_PICTURE) {
1591         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1592
1593         obj_surface = decode_state->reference_objects[1];
1594
1595         if (obj_surface)
1596             gen7_vc1_surface = obj_surface->private_data;
1597
1598         if (!gen7_vc1_surface ||
1599             (gen7_vc1_surface->picture_type == GEN7_VC1_I_PICTURE ||
1600              gen7_vc1_surface->picture_type == GEN7_VC1_BI_PICTURE))
1601             dmv_surface_valid = 0;
1602         else
1603             dmv_surface_valid = 1;
1604     }
1605
1606     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1607
1608     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1609         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1610     else {
1611         if (pic_param->picture_fields.bits.top_field_first)
1612             fcm = 2;
1613         else
1614             fcm = 3;
1615     }
1616
1617     if (picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1618         brfd = pic_param->reference_fields.bits.reference_distance;
1619         brfd = (scale_factor * brfd) >> 8;
1620         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1621
1622         if (brfd < 0)
1623             brfd = 0;
1624     }
1625
1626     if (pic_param->sequence_fields.bits.overlap) {
1627         if (profile == GEN7_VC1_ADVANCED_PROFILE) {
1628             if (picture_type == GEN7_VC1_P_PICTURE &&
1629                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1630                 overlap = 1;
1631             }
1632             if (picture_type == GEN7_VC1_I_PICTURE ||
1633                 picture_type == GEN7_VC1_BI_PICTURE) {
1634                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1635                     overlap = 1;
1636                 } else if (pic_param->conditional_overlap_flag == 1 || /* all block boundaries */
1637                            pic_param->conditional_overlap_flag == 2) { /* coded by OVERFLAGSMB bitplane */
1638                     overlap = 1;
1639                 }
1640             }
1641         } else {
1642             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1643                 picture_type != GEN7_VC1_B_PICTURE) {
1644                 overlap = 1;
1645             }
1646         }
1647     }
1648
1649     assert(pic_param->conditional_overlap_flag < 3);
1650     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1651
1652     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1653         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1654          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1655         interpolation_mode = 9; /* Half-pel bilinear */
1656     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1657              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1658               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1659         interpolation_mode = 1; /* Half-pel bicubic */
1660     else
1661         interpolation_mode = 0; /* Quarter-pel bicubic */
1662
1663     BEGIN_BCS_BATCH(batch, 6);
1664     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1665     OUT_BCS_BATCH(batch,
1666                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1667                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1668     OUT_BCS_BATCH(batch,
1669                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1670                   dmv_surface_valid << 15 |
1671                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1672                   pic_param->rounding_control << 13 |
1673                   pic_param->sequence_fields.bits.syncmarker << 12 |
1674                   interpolation_mode << 8 |
1675                   0 << 7 | /* FIXME: scale up or down ??? */
1676                   pic_param->range_reduction_frame << 6 |
1677                   loopfilter << 5 |
1678                   overlap << 4 |
1679                   !pic_param->picture_fields.bits.is_first_field << 3 |
1680                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1681     OUT_BCS_BATCH(batch,
1682                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1683                   ptype << 26 |
1684                   fcm << 24 |
1685                   alt_pq << 16 |
1686                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1687                   scale_factor << 0);
1688     OUT_BCS_BATCH(batch,
1689                   unified_mv_mode << 28 |
1690                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1691                   pic_param->fast_uvmc_flag << 26 |
1692                   ref_field_pic_polarity << 25 |
1693                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1694                   pic_param->reference_fields.bits.reference_distance << 20 |
1695                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1696                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1697                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1698                   alt_pquant_edge_mask << 4 |
1699                   alt_pquant_config << 2 |
1700                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1701                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1702     OUT_BCS_BATCH(batch,
1703                   !!(pic_param->bitplane_present.value & 0x7f) << 31 |
1704                   pic_param->raw_coding.flags.forward_mb << 30 |
1705                   pic_param->raw_coding.flags.mv_type_mb << 29 |
1706                   pic_param->raw_coding.flags.skip_mb << 28 |
1707                   pic_param->raw_coding.flags.direct_mb << 27 |
1708                   pic_param->raw_coding.flags.overflags << 26 |
1709                   pic_param->raw_coding.flags.ac_pred << 25 |
1710                   pic_param->raw_coding.flags.field_tx << 24 |
1711                   pic_param->mv_fields.bits.mv_table << 20 |
1712                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1713                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1714                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1715                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1716                   pic_param->mb_mode_table << 8 |
1717                   trans_ac_y << 6 |
1718                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1719                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1720                   pic_param->cbp_table << 0);
1721     ADVANCE_BCS_BATCH(batch);
1722 }
1723
1724 static void
1725 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1726                              struct decode_state *decode_state,
1727                              struct gen7_mfd_context *gen7_mfd_context)
1728 {
1729     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1730     VAPictureParameterBufferVC1 *pic_param;
1731     int picture_type;
1732     int intensitycomp_single_fwd = 0;
1733     int luma_scale1 = 0;
1734     int luma_shift1 = 0;
1735
1736     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1737     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1738     picture_type = pic_param->picture_fields.bits.picture_type;
1739
1740     if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID) {
1741         if (picture_type == 1 || picture_type == 2) { /* P/B picture */
1742             struct gen7_vc1_surface *gen7_vc1_surface = gen7_mfd_context->reference_surface[0].obj_surface->private_data;
1743             if (gen7_vc1_surface) {
1744                 intensitycomp_single_fwd = gen7_vc1_surface->intensity_compensation;
1745                 luma_scale1 = gen7_vc1_surface->luma_scale;
1746                 luma_shift1 = gen7_vc1_surface->luma_shift;
1747             }
1748         }
1749     }
1750
1751     BEGIN_BCS_BATCH(batch, 6);
1752     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1753     OUT_BCS_BATCH(batch,
1754                   0 << 14 | /* FIXME: double ??? */
1755                   0 << 12 |
1756                   intensitycomp_single_fwd << 10 |
1757                   0 << 8 |
1758                   0 << 4 | /* FIXME: interlace mode */
1759                   0);
1760     OUT_BCS_BATCH(batch,
1761                   luma_shift1 << 16 |
1762                   luma_scale1 << 0);
1763     OUT_BCS_BATCH(batch, 0);
1764     OUT_BCS_BATCH(batch, 0);
1765     OUT_BCS_BATCH(batch, 0);
1766     ADVANCE_BCS_BATCH(batch);
1767 }
1768
1769 static void
1770 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1771                               struct decode_state *decode_state,
1772                               struct gen7_mfd_context *gen7_mfd_context)
1773 {
1774     struct i965_driver_data *i965 = i965_driver_data(ctx);
1775     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1776     struct object_surface *obj_surface;
1777     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1778
1779     obj_surface = decode_state->render_object;
1780
1781     if (obj_surface && obj_surface->private_data) {
1782         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1783     }
1784
1785     obj_surface = decode_state->reference_objects[1];
1786
1787     if (obj_surface && obj_surface->private_data) {
1788         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1789     }
1790
1791     BEGIN_BCS_BATCH(batch, 7);
1792     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1793
1794     if (dmv_write_buffer)
1795         OUT_BCS_RELOC64(batch, dmv_write_buffer,
1796                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1797                         0);
1798     else {
1799         OUT_BCS_BATCH(batch, 0);
1800         OUT_BCS_BATCH(batch, 0);
1801     }
1802
1803     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1804
1805     if (dmv_read_buffer)
1806         OUT_BCS_RELOC64(batch, dmv_read_buffer,
1807                         I915_GEM_DOMAIN_INSTRUCTION, 0,
1808                         0);
1809     else {
1810         OUT_BCS_BATCH(batch, 0);
1811         OUT_BCS_BATCH(batch, 0);
1812     }
1813
1814     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1815
1816     ADVANCE_BCS_BATCH(batch);
1817 }
1818
1819 static int
1820 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1821 {
1822     int out_slice_data_bit_offset;
1823     int slice_header_size = in_slice_data_bit_offset / 8;
1824     int i, j;
1825
1826     if (profile != 3)
1827         out_slice_data_bit_offset = in_slice_data_bit_offset;
1828     else {
1829         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1830             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1831                 if (i < slice_header_size - 1)
1832                     i++, j += 2;
1833                 else {
1834                     buf[j + 2] = buf[j + 1];
1835                     j++;
1836                 }
1837             }
1838         }
1839
1840         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1841     }
1842
1843     return out_slice_data_bit_offset;
1844 }
1845
1846 static void
1847 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1848                         VAPictureParameterBufferVC1 *pic_param,
1849                         VASliceParameterBufferVC1 *slice_param,
1850                         VASliceParameterBufferVC1 *next_slice_param,
1851                         dri_bo *slice_data_bo,
1852                         struct gen7_mfd_context *gen7_mfd_context)
1853 {
1854     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1855     int next_slice_start_vert_pos;
1856     int macroblock_offset;
1857     uint8_t *slice_data = NULL;
1858
1859     dri_bo_map(slice_data_bo, True);
1860     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1861     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1862                                                                slice_param->macroblock_offset,
1863                                                                pic_param->sequence_fields.bits.profile);
1864     dri_bo_unmap(slice_data_bo);
1865
1866     if (next_slice_param)
1867         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1868     else
1869         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1870
1871     BEGIN_BCS_BATCH(batch, 5);
1872     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1873     OUT_BCS_BATCH(batch,
1874                   slice_param->slice_data_size - (macroblock_offset >> 3));
1875     OUT_BCS_BATCH(batch,
1876                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1877     OUT_BCS_BATCH(batch,
1878                   slice_param->slice_vertical_position << 16 |
1879                   next_slice_start_vert_pos << 0);
1880     OUT_BCS_BATCH(batch,
1881                   (macroblock_offset & 0x7));
1882     ADVANCE_BCS_BATCH(batch);
1883 }
1884
1885 static void
1886 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1887                             struct decode_state *decode_state,
1888                             struct gen7_mfd_context *gen7_mfd_context)
1889 {
1890     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1891     VAPictureParameterBufferVC1 *pic_param;
1892     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1893     dri_bo *slice_data_bo;
1894     int i, j;
1895
1896     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1897     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1898
1899     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1900     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1901     intel_batchbuffer_emit_mi_flush(batch);
1902     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1903     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1904     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1905     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1906     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1907     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1908     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1909
1910     for (j = 0; j < decode_state->num_slice_params; j++) {
1911         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1912         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1913         slice_data_bo = decode_state->slice_datas[j]->bo;
1914         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1915
1916         if (j == decode_state->num_slice_params - 1)
1917             next_slice_group_param = NULL;
1918         else
1919             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1920
1921         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1922             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1923
1924             if (i < decode_state->slice_params[j]->num_elements - 1)
1925                 next_slice_param = slice_param + 1;
1926             else
1927                 next_slice_param = next_slice_group_param;
1928
1929             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1930             slice_param++;
1931         }
1932     }
1933
1934     intel_batchbuffer_end_atomic(batch);
1935     intel_batchbuffer_flush(batch);
1936 }
1937
1938 static void
1939 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1940                           struct decode_state *decode_state,
1941                           struct gen7_mfd_context *gen7_mfd_context)
1942 {
1943     struct object_surface *obj_surface;
1944     VAPictureParameterBufferJPEGBaseline *pic_param;
1945     int subsampling = SUBSAMPLE_YUV420;
1946     int fourcc = VA_FOURCC_IMC3;
1947
1948     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1949
1950     if (pic_param->num_components == 1) {
1951         subsampling = SUBSAMPLE_YUV400;
1952         fourcc = VA_FOURCC_Y800;
1953     } else if (pic_param->num_components == 3) {
1954         int h1 = pic_param->components[0].h_sampling_factor;
1955         int h2 = pic_param->components[1].h_sampling_factor;
1956         int h3 = pic_param->components[2].h_sampling_factor;
1957         int v1 = pic_param->components[0].v_sampling_factor;
1958         int v2 = pic_param->components[1].v_sampling_factor;
1959         int v3 = pic_param->components[2].v_sampling_factor;
1960
1961         if (h1 == 2 * h2 && h2 == h3 &&
1962             v1 == 2 * v2 && v2 == v3) {
1963             subsampling = SUBSAMPLE_YUV420;
1964             fourcc = VA_FOURCC_IMC3;
1965         } else if (h1 == 2 * h2  && h2 == h3 &&
1966                    v1 == v2 && v2 == v3) {
1967             subsampling = SUBSAMPLE_YUV422H;
1968             fourcc = VA_FOURCC_422H;
1969         } else if (h1 == h2 && h2 == h3 &&
1970                    v1 == v2  && v2 == v3) {
1971             subsampling = SUBSAMPLE_YUV444;
1972             fourcc = VA_FOURCC_444P;
1973         } else if (h1 == 4 * h2 && h2 ==  h3 &&
1974                    v1 == v2 && v2 == v3) {
1975             subsampling = SUBSAMPLE_YUV411;
1976             fourcc = VA_FOURCC_411P;
1977         } else if (h1 == h2 && h2 == h3 &&
1978                    v1 == 2 * v2 && v2 == v3) {
1979             subsampling = SUBSAMPLE_YUV422V;
1980             fourcc = VA_FOURCC_422V;
1981         } else
1982             assert(0);
1983     } else {
1984         assert(0);
1985     }
1986
1987     /* Current decoded picture */
1988     obj_surface = decode_state->render_object;
1989     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1990
1991     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1992     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1993     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1994     gen7_mfd_context->pre_deblocking_output.valid = 1;
1995
1996     gen7_mfd_context->post_deblocking_output.bo = NULL;
1997     gen7_mfd_context->post_deblocking_output.valid = 0;
1998
1999     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2000     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2001
2002     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2003     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2004
2005     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2006     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2007
2008     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2009     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2010
2011     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2012     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2013 }
2014
2015 static const int va_to_gen7_jpeg_rotation[4] = {
2016     GEN7_JPEG_ROTATION_0,
2017     GEN7_JPEG_ROTATION_90,
2018     GEN7_JPEG_ROTATION_180,
2019     GEN7_JPEG_ROTATION_270
2020 };
2021
2022 static void
2023 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
2024                         struct decode_state *decode_state,
2025                         struct gen7_mfd_context *gen7_mfd_context)
2026 {
2027     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2028     VAPictureParameterBufferJPEGBaseline *pic_param;
2029     int chroma_type = GEN7_YUV420;
2030     int frame_width_in_blks;
2031     int frame_height_in_blks;
2032
2033     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2034     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2035
2036     if (pic_param->num_components == 1)
2037         chroma_type = GEN7_YUV400;
2038     else if (pic_param->num_components == 3) {
2039         int h1 = pic_param->components[0].h_sampling_factor;
2040         int h2 = pic_param->components[1].h_sampling_factor;
2041         int h3 = pic_param->components[2].h_sampling_factor;
2042         int v1 = pic_param->components[0].v_sampling_factor;
2043         int v2 = pic_param->components[1].v_sampling_factor;
2044         int v3 = pic_param->components[2].v_sampling_factor;
2045
2046         if (h1 == 2 * h2 && h2 == h3 &&
2047             v1 == 2 * v2 && v2 == v3)
2048             chroma_type = GEN7_YUV420;
2049         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2050                  v1 == 1 && v2 == 1 && v3 == 1)
2051             chroma_type = GEN7_YUV422H_2Y;
2052         else if (h1 == h2 && h2 == h3 &&
2053                  v1 == v2 && v2 == v3)
2054             chroma_type = GEN7_YUV444;
2055         else if (h1 == 4 * h2 && h2 == h3 &&
2056                  v1 == v2 && v2 == v3)
2057             chroma_type = GEN7_YUV411;
2058         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2059                  v1 == 2 && v2 == 1 && v3 == 1)
2060             chroma_type = GEN7_YUV422V_2Y;
2061         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2062                  v1 == 2 && v2 == 2 && v3 == 2)
2063             chroma_type = GEN7_YUV422H_4Y;
2064         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2065                  v1 == 2 && v2 == 1 && v3 == 1)
2066             chroma_type = GEN7_YUV422V_4Y;
2067         else
2068             assert(0);
2069     }
2070
2071     if (chroma_type == GEN7_YUV400 ||
2072         chroma_type == GEN7_YUV444 ||
2073         chroma_type == GEN7_YUV422V_2Y) {
2074         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2075         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2076     } else if (chroma_type == GEN7_YUV411) {
2077         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2078         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2079     } else {
2080         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2081         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2082     }
2083
2084     BEGIN_BCS_BATCH(batch, 3);
2085     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2086     OUT_BCS_BATCH(batch,
2087                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2088                   (chroma_type << 0));
2089     OUT_BCS_BATCH(batch,
2090                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2091                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2092     ADVANCE_BCS_BATCH(batch);
2093 }
2094
2095 static const int va_to_gen7_jpeg_hufftable[2] = {
2096     MFX_HUFFTABLE_ID_Y,
2097     MFX_HUFFTABLE_ID_UV
2098 };
2099
2100 static void
2101 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2102                                struct decode_state *decode_state,
2103                                struct gen7_mfd_context *gen7_mfd_context,
2104                                int num_tables)
2105 {
2106     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2107     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2108     int index;
2109
2110     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2111         return;
2112
2113     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2114
2115     for (index = 0; index < num_tables; index++) {
2116         int id = va_to_gen7_jpeg_hufftable[index];
2117         if (!huffman_table->load_huffman_table[index])
2118             continue;
2119         BEGIN_BCS_BATCH(batch, 53);
2120         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2121         OUT_BCS_BATCH(batch, id);
2122         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2123         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2124         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2125         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2126         ADVANCE_BCS_BATCH(batch);
2127     }
2128 }
2129
2130 static const int va_to_gen7_jpeg_qm[5] = {
2131     -1,
2132     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2133     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2134     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2135     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2136 };
2137
2138 static void
2139 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2140                        struct decode_state *decode_state,
2141                        struct gen7_mfd_context *gen7_mfd_context)
2142 {
2143     VAPictureParameterBufferJPEGBaseline *pic_param;
2144     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2145     int index;
2146
2147     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2148         return;
2149
2150     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2151     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2152
2153     assert(pic_param->num_components <= 3);
2154
2155     for (index = 0; index < pic_param->num_components; index++) {
2156         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2157         int qm_type;
2158         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2159         unsigned char raster_qm[64];
2160         int j;
2161
2162         if (id > 4 || id < 1)
2163             continue;
2164
2165         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2166             continue;
2167
2168         qm_type = va_to_gen7_jpeg_qm[id];
2169
2170         for (j = 0; j < 64; j++)
2171             raster_qm[zigzag_direct[j]] = qm[j];
2172
2173         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2174     }
2175 }
2176
2177 static void
2178 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2179                          VAPictureParameterBufferJPEGBaseline *pic_param,
2180                          VASliceParameterBufferJPEGBaseline *slice_param,
2181                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2182                          dri_bo *slice_data_bo,
2183                          struct gen7_mfd_context *gen7_mfd_context)
2184 {
2185     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2186     int scan_component_mask = 0;
2187     int i;
2188
2189     assert(slice_param->num_components > 0);
2190     assert(slice_param->num_components < 4);
2191     assert(slice_param->num_components <= pic_param->num_components);
2192
2193     for (i = 0; i < slice_param->num_components; i++) {
2194         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2195         case 1:
2196             scan_component_mask |= (1 << 0);
2197             break;
2198         case 2:
2199             scan_component_mask |= (1 << 1);
2200             break;
2201         case 3:
2202             scan_component_mask |= (1 << 2);
2203             break;
2204         default:
2205             assert(0);
2206             break;
2207         }
2208     }
2209
2210     BEGIN_BCS_BATCH(batch, 6);
2211     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2212     OUT_BCS_BATCH(batch,
2213                   slice_param->slice_data_size);
2214     OUT_BCS_BATCH(batch,
2215                   slice_param->slice_data_offset);
2216     OUT_BCS_BATCH(batch,
2217                   slice_param->slice_horizontal_position << 16 |
2218                   slice_param->slice_vertical_position << 0);
2219     OUT_BCS_BATCH(batch,
2220                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2221                   (scan_component_mask << 27) |                 /* scan components */
2222                   (0 << 26) |   /* disable interrupt allowed */
2223                   (slice_param->num_mcus << 0));                /* MCU count */
2224     OUT_BCS_BATCH(batch,
2225                   (slice_param->restart_interval << 0));    /* RestartInterval */
2226     ADVANCE_BCS_BATCH(batch);
2227 }
2228
2229 /* Workaround for JPEG decoding on Ivybridge */
2230 #ifdef JPEG_WA
2231
2232 static struct {
2233     int width;
2234     int height;
2235     unsigned char data[32];
2236     int data_size;
2237     int data_bit_offset;
2238     int qp;
2239 } gen7_jpeg_wa_clip = {
2240     16,
2241     16,
2242     {
2243         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2244         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2245     },
2246     14,
2247     40,
2248     28,
2249 };
2250
2251 static void
2252 gen8_jpeg_wa_init(VADriverContextP ctx,
2253                   struct gen7_mfd_context *gen7_mfd_context)
2254 {
2255     struct i965_driver_data *i965 = i965_driver_data(ctx);
2256     VAStatus status;
2257     struct object_surface *obj_surface;
2258
2259     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2260         i965_DestroySurfaces(ctx,
2261                              &gen7_mfd_context->jpeg_wa_surface_id,
2262                              1);
2263
2264     status = i965_CreateSurfaces(ctx,
2265                                  gen7_jpeg_wa_clip.width,
2266                                  gen7_jpeg_wa_clip.height,
2267                                  VA_RT_FORMAT_YUV420,
2268                                  1,
2269                                  &gen7_mfd_context->jpeg_wa_surface_id);
2270     assert(status == VA_STATUS_SUCCESS);
2271
2272     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2273     assert(obj_surface);
2274     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2275     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2276
2277     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2278         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2279                                                                "JPEG WA data",
2280                                                                0x1000,
2281                                                                0x1000);
2282         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2283                        0,
2284                        gen7_jpeg_wa_clip.data_size,
2285                        gen7_jpeg_wa_clip.data);
2286     }
2287 }
2288
2289 static void
2290 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2291                               struct gen7_mfd_context *gen7_mfd_context)
2292 {
2293     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2294
2295     BEGIN_BCS_BATCH(batch, 5);
2296     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2297     OUT_BCS_BATCH(batch,
2298                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2299                   (MFD_MODE_VLD << 15) | /* VLD mode */
2300                   (0 << 10) | /* disable Stream-Out */
2301                   (0 << 9)  | /* Post Deblocking Output */
2302                   (1 << 8)  | /* Pre Deblocking Output */
2303                   (0 << 5)  | /* not in stitch mode */
2304                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2305                   (MFX_FORMAT_AVC << 0));
2306     OUT_BCS_BATCH(batch,
2307                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2308                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2309                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2310                   (0 << 1)  |
2311                   (0 << 0));
2312     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2313     OUT_BCS_BATCH(batch, 0); /* reserved */
2314     ADVANCE_BCS_BATCH(batch);
2315 }
2316
2317 static void
2318 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2319                            struct gen7_mfd_context *gen7_mfd_context)
2320 {
2321     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2322     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2323
2324     BEGIN_BCS_BATCH(batch, 6);
2325     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2326     OUT_BCS_BATCH(batch, 0);
2327     OUT_BCS_BATCH(batch,
2328                   ((obj_surface->orig_width - 1) << 18) |
2329                   ((obj_surface->orig_height - 1) << 4));
2330     OUT_BCS_BATCH(batch,
2331                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2332                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2333                   (0 << 22) | /* surface object control state, ignored */
2334                   ((obj_surface->width - 1) << 3) | /* pitch */
2335                   (0 << 2)  | /* must be 0 */
2336                   (1 << 1)  | /* must be tiled */
2337                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2338     OUT_BCS_BATCH(batch,
2339                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2340                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2341     OUT_BCS_BATCH(batch,
2342                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2343                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2344     ADVANCE_BCS_BATCH(batch);
2345 }
2346
2347 static void
2348 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2349                                  struct gen7_mfd_context *gen7_mfd_context)
2350 {
2351     struct i965_driver_data *i965 = i965_driver_data(ctx);
2352     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2353     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2354     dri_bo *intra_bo;
2355     int i;
2356
2357     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2358                             "intra row store",
2359                             128 * 64,
2360                             0x1000);
2361
2362     BEGIN_BCS_BATCH(batch, 61);
2363     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2364     OUT_BCS_RELOC64(batch,
2365                     obj_surface->bo,
2366                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2367                     0);
2368     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2369
2370
2371     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2372     OUT_BCS_BATCH(batch, 0);
2373     OUT_BCS_BATCH(batch, 0);
2374
2375     /* uncompressed-video & stream out 7-12 */
2376     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2377     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2378     OUT_BCS_BATCH(batch, 0);
2379     OUT_BCS_BATCH(batch, 0);
2380     OUT_BCS_BATCH(batch, 0);
2381     OUT_BCS_BATCH(batch, 0);
2382
2383     /* the DW 13-15 is for intra row store scratch */
2384     OUT_BCS_RELOC64(batch,
2385                     intra_bo,
2386                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2387                     0);
2388
2389     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2390
2391     /* the DW 16-18 is for deblocking filter */
2392     OUT_BCS_BATCH(batch, 0);
2393     OUT_BCS_BATCH(batch, 0);
2394     OUT_BCS_BATCH(batch, 0);
2395
2396     /* DW 19..50 */
2397     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2398         OUT_BCS_BATCH(batch, 0);
2399         OUT_BCS_BATCH(batch, 0);
2400     }
2401     OUT_BCS_BATCH(batch, 0);
2402
2403     /* the DW52-54 is for mb status address */
2404     OUT_BCS_BATCH(batch, 0);
2405     OUT_BCS_BATCH(batch, 0);
2406     OUT_BCS_BATCH(batch, 0);
2407     /* the DW56-60 is for ILDB & second ILDB address */
2408     OUT_BCS_BATCH(batch, 0);
2409     OUT_BCS_BATCH(batch, 0);
2410     OUT_BCS_BATCH(batch, 0);
2411     OUT_BCS_BATCH(batch, 0);
2412     OUT_BCS_BATCH(batch, 0);
2413     OUT_BCS_BATCH(batch, 0);
2414
2415     ADVANCE_BCS_BATCH(batch);
2416
2417     dri_bo_unreference(intra_bo);
2418 }
2419
2420 static void
2421 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2422                                      struct gen7_mfd_context *gen7_mfd_context)
2423 {
2424     struct i965_driver_data *i965 = i965_driver_data(ctx);
2425     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2426     dri_bo *bsd_mpc_bo, *mpr_bo;
2427
2428     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2429                               "bsd mpc row store",
2430                               11520, /* 1.5 * 120 * 64 */
2431                               0x1000);
2432
2433     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2434                           "mpr row store",
2435                           7680, /* 1. 0 * 120 * 64 */
2436                           0x1000);
2437
2438     BEGIN_BCS_BATCH(batch, 10);
2439     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2440
2441     OUT_BCS_RELOC64(batch,
2442                     bsd_mpc_bo,
2443                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2444                     0);
2445
2446     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2447
2448     OUT_BCS_RELOC64(batch,
2449                     mpr_bo,
2450                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2451                     0);
2452     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2453
2454     OUT_BCS_BATCH(batch, 0);
2455     OUT_BCS_BATCH(batch, 0);
2456     OUT_BCS_BATCH(batch, 0);
2457
2458     ADVANCE_BCS_BATCH(batch);
2459
2460     dri_bo_unreference(bsd_mpc_bo);
2461     dri_bo_unreference(mpr_bo);
2462 }
2463
2464 static void
2465 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2466                           struct gen7_mfd_context *gen7_mfd_context)
2467 {
2468
2469 }
2470
2471 static void
2472 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2473                            struct gen7_mfd_context *gen7_mfd_context)
2474 {
2475     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2476     int img_struct = 0;
2477     int mbaff_frame_flag = 0;
2478     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2479
2480     BEGIN_BCS_BATCH(batch, 16);
2481     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2482     OUT_BCS_BATCH(batch,
2483                   width_in_mbs * height_in_mbs);
2484     OUT_BCS_BATCH(batch,
2485                   ((height_in_mbs - 1) << 16) |
2486                   ((width_in_mbs - 1) << 0));
2487     OUT_BCS_BATCH(batch,
2488                   (0 << 24) |
2489                   (0 << 16) |
2490                   (0 << 14) |
2491                   (0 << 13) |
2492                   (0 << 12) | /* differ from GEN6 */
2493                   (0 << 10) |
2494                   (img_struct << 8));
2495     OUT_BCS_BATCH(batch,
2496                   (1 << 10) | /* 4:2:0 */
2497                   (1 << 7) |  /* CABAC */
2498                   (0 << 6) |
2499                   (0 << 5) |
2500                   (0 << 4) |
2501                   (0 << 3) |
2502                   (1 << 2) |
2503                   (mbaff_frame_flag << 1) |
2504                   (0 << 0));
2505     OUT_BCS_BATCH(batch, 0);
2506     OUT_BCS_BATCH(batch, 0);
2507     OUT_BCS_BATCH(batch, 0);
2508     OUT_BCS_BATCH(batch, 0);
2509     OUT_BCS_BATCH(batch, 0);
2510     OUT_BCS_BATCH(batch, 0);
2511     OUT_BCS_BATCH(batch, 0);
2512     OUT_BCS_BATCH(batch, 0);
2513     OUT_BCS_BATCH(batch, 0);
2514     OUT_BCS_BATCH(batch, 0);
2515     OUT_BCS_BATCH(batch, 0);
2516     ADVANCE_BCS_BATCH(batch);
2517 }
2518
2519 static void
2520 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2521                                   struct gen7_mfd_context *gen7_mfd_context)
2522 {
2523     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2524     int i;
2525
2526     BEGIN_BCS_BATCH(batch, 71);
2527     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2528
2529     /* reference surfaces 0..15 */
2530     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2531         OUT_BCS_BATCH(batch, 0); /* top */
2532         OUT_BCS_BATCH(batch, 0); /* bottom */
2533     }
2534
2535     OUT_BCS_BATCH(batch, 0);
2536
2537     /* the current decoding frame/field */
2538     OUT_BCS_BATCH(batch, 0); /* top */
2539     OUT_BCS_BATCH(batch, 0);
2540     OUT_BCS_BATCH(batch, 0);
2541
2542     /* POC List */
2543     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2544         OUT_BCS_BATCH(batch, 0);
2545         OUT_BCS_BATCH(batch, 0);
2546     }
2547
2548     OUT_BCS_BATCH(batch, 0);
2549     OUT_BCS_BATCH(batch, 0);
2550
2551     ADVANCE_BCS_BATCH(batch);
2552 }
2553
2554 static void
2555 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2556                                      struct gen7_mfd_context *gen7_mfd_context)
2557 {
2558     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2559
2560     BEGIN_BCS_BATCH(batch, 11);
2561     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2562     OUT_BCS_RELOC64(batch,
2563                     gen7_mfd_context->jpeg_wa_slice_data_bo,
2564                     I915_GEM_DOMAIN_INSTRUCTION, 0,
2565                     0);
2566     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2567     OUT_BCS_BATCH(batch, 0);
2568     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2569     OUT_BCS_BATCH(batch, 0);
2570     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2571     OUT_BCS_BATCH(batch, 0);
2572     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2573     OUT_BCS_BATCH(batch, 0);
2574     ADVANCE_BCS_BATCH(batch);
2575 }
2576
2577 static void
2578 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2579                             struct gen7_mfd_context *gen7_mfd_context)
2580 {
2581     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2582
2583     /* the input bitsteam format on GEN7 differs from GEN6 */
2584     BEGIN_BCS_BATCH(batch, 6);
2585     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2586     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2587     OUT_BCS_BATCH(batch, 0);
2588     OUT_BCS_BATCH(batch,
2589                   (0 << 31) |
2590                   (0 << 14) |
2591                   (0 << 12) |
2592                   (0 << 10) |
2593                   (0 << 8));
2594     OUT_BCS_BATCH(batch,
2595                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2596                   (0 << 5)  |
2597                   (0 << 4)  |
2598                   (1 << 3) | /* LastSlice Flag */
2599                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2600     OUT_BCS_BATCH(batch, 0);
2601     ADVANCE_BCS_BATCH(batch);
2602 }
2603
2604 static void
2605 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2606                              struct gen7_mfd_context *gen7_mfd_context)
2607 {
2608     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2609     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2610     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2611     int first_mb_in_slice = 0;
2612     int slice_type = SLICE_TYPE_I;
2613
2614     BEGIN_BCS_BATCH(batch, 11);
2615     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2616     OUT_BCS_BATCH(batch, slice_type);
2617     OUT_BCS_BATCH(batch,
2618                   (num_ref_idx_l1 << 24) |
2619                   (num_ref_idx_l0 << 16) |
2620                   (0 << 8) |
2621                   (0 << 0));
2622     OUT_BCS_BATCH(batch,
2623                   (0 << 29) |
2624                   (1 << 27) |   /* disable Deblocking */
2625                   (0 << 24) |
2626                   (gen7_jpeg_wa_clip.qp << 16) |
2627                   (0 << 8) |
2628                   (0 << 0));
2629     OUT_BCS_BATCH(batch,
2630                   (slice_ver_pos << 24) |
2631                   (slice_hor_pos << 16) |
2632                   (first_mb_in_slice << 0));
2633     OUT_BCS_BATCH(batch,
2634                   (next_slice_ver_pos << 16) |
2635                   (next_slice_hor_pos << 0));
2636     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2637     OUT_BCS_BATCH(batch, 0);
2638     OUT_BCS_BATCH(batch, 0);
2639     OUT_BCS_BATCH(batch, 0);
2640     OUT_BCS_BATCH(batch, 0);
2641     ADVANCE_BCS_BATCH(batch);
2642 }
2643
2644 static void
2645 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2646                  struct gen7_mfd_context *gen7_mfd_context)
2647 {
2648     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2649     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2650     intel_batchbuffer_emit_mi_flush(batch);
2651     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2652     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2653     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2654     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2655     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2656     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2657     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2658
2659     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2660     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2661     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2662 }
2663
2664 #endif
2665
2666 void
2667 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2668                              struct decode_state *decode_state,
2669                              struct gen7_mfd_context *gen7_mfd_context)
2670 {
2671     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2672     VAPictureParameterBufferJPEGBaseline *pic_param;
2673     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2674     dri_bo *slice_data_bo;
2675     int i, j, max_selector = 0;
2676
2677     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2678     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2679
2680     /* Currently only support Baseline DCT */
2681     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2682     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2683 #ifdef JPEG_WA
2684     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2685 #endif
2686     intel_batchbuffer_emit_mi_flush(batch);
2687     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2688     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2689     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2690     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2691     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2692
2693     for (j = 0; j < decode_state->num_slice_params; j++) {
2694         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2695         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2696         slice_data_bo = decode_state->slice_datas[j]->bo;
2697         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2698
2699         if (j == decode_state->num_slice_params - 1)
2700             next_slice_group_param = NULL;
2701         else
2702             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2703
2704         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2705             int component;
2706
2707             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2708
2709             if (i < decode_state->slice_params[j]->num_elements - 1)
2710                 next_slice_param = slice_param + 1;
2711             else
2712                 next_slice_param = next_slice_group_param;
2713
2714             for (component = 0; component < slice_param->num_components; component++) {
2715                 if (max_selector < slice_param->components[component].dc_table_selector)
2716                     max_selector = slice_param->components[component].dc_table_selector;
2717
2718                 if (max_selector < slice_param->components[component].ac_table_selector)
2719                     max_selector = slice_param->components[component].ac_table_selector;
2720             }
2721
2722             slice_param++;
2723         }
2724     }
2725
2726     assert(max_selector < 2);
2727     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2728
2729     for (j = 0; j < decode_state->num_slice_params; j++) {
2730         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2731         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2732         slice_data_bo = decode_state->slice_datas[j]->bo;
2733         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2734
2735         if (j == decode_state->num_slice_params - 1)
2736             next_slice_group_param = NULL;
2737         else
2738             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2739
2740         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2741             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2742
2743             if (i < decode_state->slice_params[j]->num_elements - 1)
2744                 next_slice_param = slice_param + 1;
2745             else
2746                 next_slice_param = next_slice_group_param;
2747
2748             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2749             slice_param++;
2750         }
2751     }
2752
2753     intel_batchbuffer_end_atomic(batch);
2754     intel_batchbuffer_flush(batch);
2755 }
2756
2757 static const int vp8_dc_qlookup[128] = {
2758     4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2759     18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2760     29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2761     44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2762     59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2763     75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2764     91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2765     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2766 };
2767
2768 static const int vp8_ac_qlookup[128] = {
2769     4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2770     20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2771     36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2772     52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2773     78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2774     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2775     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2776     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2777 };
2778
2779 static inline unsigned int vp8_clip_quantization_index(int index)
2780 {
2781     if (index > 127)
2782         return 127;
2783     else if (index < 0)
2784         return 0;
2785
2786     return index;
2787 }
2788
2789 static void
2790 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2791                          struct decode_state *decode_state,
2792                          struct gen7_mfd_context *gen7_mfd_context)
2793 {
2794     struct object_surface *obj_surface;
2795     struct i965_driver_data *i965 = i965_driver_data(ctx);
2796     dri_bo *bo;
2797     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2798     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2799     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2800
2801     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2802     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2803
2804     intel_update_vp8_frame_store_index(ctx,
2805                                        decode_state,
2806                                        pic_param,
2807                                        gen7_mfd_context->reference_surface);
2808
2809     /* Current decoded picture */
2810     obj_surface = decode_state->render_object;
2811     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2812
2813     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2814     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2815     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2816     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2817
2818     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2819     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2820     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2821     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2822
2823     intel_ensure_vp8_segmentation_buffer(ctx,
2824                                          &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2825
2826     /* The same as AVC */
2827     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2828     bo = dri_bo_alloc(i965->intel.bufmgr,
2829                       "intra row store",
2830                       width_in_mbs * 64,
2831                       0x1000);
2832     assert(bo);
2833     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2834     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2835
2836     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2837     bo = dri_bo_alloc(i965->intel.bufmgr,
2838                       "deblocking filter row store",
2839                       width_in_mbs * 64 * 4,
2840                       0x1000);
2841     assert(bo);
2842     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2843     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2844
2845     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2846     bo = dri_bo_alloc(i965->intel.bufmgr,
2847                       "bsd mpc row store",
2848                       width_in_mbs * 64 * 2,
2849                       0x1000);
2850     assert(bo);
2851     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2852     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2853
2854     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2855     bo = dri_bo_alloc(i965->intel.bufmgr,
2856                       "mpr row store",
2857                       width_in_mbs * 64 * 2,
2858                       0x1000);
2859     assert(bo);
2860     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2861     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2862
2863     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2864 }
2865
2866 static void
2867 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2868                        struct decode_state *decode_state,
2869                        struct gen7_mfd_context *gen7_mfd_context)
2870 {
2871     struct i965_driver_data *i965 = i965_driver_data(ctx);
2872     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2873     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2874     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2875     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2876     dri_bo *probs_bo = decode_state->probability_data->bo;
2877     int i, j, log2num;
2878     unsigned int quantization_value[4][6];
2879
2880     /* There is no safe way to error out if the segmentation buffer
2881        could not be allocated. So, instead of aborting, simply decode
2882        something even if the result may look totally inacurate */
2883     const unsigned int enable_segmentation =
2884         pic_param->pic_fields.bits.segmentation_enabled &&
2885         gen7_mfd_context->segmentation_buffer.valid;
2886
2887     log2num = (int)log2(slice_param->num_of_partitions - 1);
2888
2889     BEGIN_BCS_BATCH(batch, 38);
2890     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2891     OUT_BCS_BATCH(batch,
2892                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2893                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2894     OUT_BCS_BATCH(batch,
2895                   log2num << 24 |
2896                   pic_param->pic_fields.bits.sharpness_level << 16 |
2897                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2898                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2899                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2900                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2901                   (enable_segmentation &&
2902                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2903                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2904                   (enable_segmentation &&
2905                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2906                   (enable_segmentation &&
2907                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2908                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2909                   pic_param->pic_fields.bits.filter_type << 4 |
2910                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2911                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2912
2913     OUT_BCS_BATCH(batch,
2914                   pic_param->loop_filter_level[3] << 24 |
2915                   pic_param->loop_filter_level[2] << 16 |
2916                   pic_param->loop_filter_level[1] <<  8 |
2917                   pic_param->loop_filter_level[0] <<  0);
2918
2919     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2920     for (i = 0; i < 4; i++) {
2921         quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2922         quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2923         quantization_value[i][2] = 2 * vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])]; /*y2dc*/
2924         /* 101581>>16 is equivalent to 155/100 */
2925         quantization_value[i][3] = (101581 * vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16; /*y2ac*/
2926         quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2927         quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2928
2929         quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2930         quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2931
2932         OUT_BCS_BATCH(batch,
2933                       quantization_value[i][0] << 16 | /* Y1AC */
2934                       quantization_value[i][1] <<  0); /* Y1DC */
2935         OUT_BCS_BATCH(batch,
2936                       quantization_value[i][5] << 16 | /* UVAC */
2937                       quantization_value[i][4] <<  0); /* UVDC */
2938         OUT_BCS_BATCH(batch,
2939                       quantization_value[i][3] << 16 | /* Y2AC */
2940                       quantization_value[i][2] <<  0); /* Y2DC */
2941     }
2942
2943     /* CoeffProbability table for non-key frame, DW16-DW18 */
2944     if (probs_bo) {
2945         OUT_BCS_RELOC64(batch, probs_bo,
2946                         0, I915_GEM_DOMAIN_INSTRUCTION,
2947                         0);
2948         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2949     } else {
2950         OUT_BCS_BATCH(batch, 0);
2951         OUT_BCS_BATCH(batch, 0);
2952         OUT_BCS_BATCH(batch, 0);
2953     }
2954
2955     OUT_BCS_BATCH(batch,
2956                   pic_param->mb_segment_tree_probs[2] << 16 |
2957                   pic_param->mb_segment_tree_probs[1] <<  8 |
2958                   pic_param->mb_segment_tree_probs[0] <<  0);
2959
2960     OUT_BCS_BATCH(batch,
2961                   pic_param->prob_skip_false << 24 |
2962                   pic_param->prob_intra      << 16 |
2963                   pic_param->prob_last       <<  8 |
2964                   pic_param->prob_gf         <<  0);
2965
2966     OUT_BCS_BATCH(batch,
2967                   pic_param->y_mode_probs[3] << 24 |
2968                   pic_param->y_mode_probs[2] << 16 |
2969                   pic_param->y_mode_probs[1] <<  8 |
2970                   pic_param->y_mode_probs[0] <<  0);
2971
2972     OUT_BCS_BATCH(batch,
2973                   pic_param->uv_mode_probs[2] << 16 |
2974                   pic_param->uv_mode_probs[1] <<  8 |
2975                   pic_param->uv_mode_probs[0] <<  0);
2976
2977     /* MV update value, DW23-DW32 */
2978     for (i = 0; i < 2; i++) {
2979         for (j = 0; j < 20; j += 4) {
2980             OUT_BCS_BATCH(batch,
2981                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2982                           pic_param->mv_probs[i][j + 2] << 16 |
2983                           pic_param->mv_probs[i][j + 1] <<  8 |
2984                           pic_param->mv_probs[i][j + 0] <<  0);
2985         }
2986     }
2987
2988     OUT_BCS_BATCH(batch,
2989                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2990                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2991                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2992                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2993
2994     OUT_BCS_BATCH(batch,
2995                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2996                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2997                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2998                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2999
3000     /* segmentation id stream base address, DW35-DW37 */
3001     if (enable_segmentation) {
3002         OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
3003                         0, I915_GEM_DOMAIN_INSTRUCTION,
3004                         0);
3005         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
3006     } else {
3007         OUT_BCS_BATCH(batch, 0);
3008         OUT_BCS_BATCH(batch, 0);
3009         OUT_BCS_BATCH(batch, 0);
3010     }
3011     ADVANCE_BCS_BATCH(batch);
3012 }
3013
3014 static void
3015 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
3016                         VAPictureParameterBufferVP8 *pic_param,
3017                         VASliceParameterBufferVP8 *slice_param,
3018                         dri_bo *slice_data_bo,
3019                         struct gen7_mfd_context *gen7_mfd_context)
3020 {
3021     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3022     int i, log2num;
3023     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7) >> 3);
3024     unsigned int used_bits = 8 - pic_param->bool_coder_ctx.count;
3025     unsigned int partition_size_0 = slice_param->partition_size[0];
3026
3027     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
3028     if (used_bits == 8) {
3029         used_bits = 0;
3030         offset += 1;
3031         partition_size_0 -= 1;
3032     }
3033
3034     assert(slice_param->num_of_partitions >= 2);
3035     assert(slice_param->num_of_partitions <= 9);
3036
3037     log2num = (int)log2(slice_param->num_of_partitions - 1);
3038
3039     BEGIN_BCS_BATCH(batch, 22);
3040     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
3041     OUT_BCS_BATCH(batch,
3042                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
3043                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
3044                   log2num << 4 |
3045                   (slice_param->macroblock_offset & 0x7));
3046     OUT_BCS_BATCH(batch,
3047                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3048                   0);
3049
3050     OUT_BCS_BATCH(batch, partition_size_0 + 1);
3051     OUT_BCS_BATCH(batch, offset);
3052     //partion sizes in bytes are present after the above first partition when there are more than one token partition
3053     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3054     for (i = 1; i < 9; i++) {
3055         if (i < slice_param->num_of_partitions) {
3056             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3057             OUT_BCS_BATCH(batch, offset);
3058         } else {
3059             OUT_BCS_BATCH(batch, 0);
3060             OUT_BCS_BATCH(batch, 0);
3061         }
3062
3063         offset += slice_param->partition_size[i];
3064     }
3065
3066     OUT_BCS_BATCH(batch, 0); /* concealment method */
3067
3068     ADVANCE_BCS_BATCH(batch);
3069 }
3070
3071 void
3072 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3073                             struct decode_state *decode_state,
3074                             struct gen7_mfd_context *gen7_mfd_context)
3075 {
3076     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3077     VAPictureParameterBufferVP8 *pic_param;
3078     VASliceParameterBufferVP8 *slice_param;
3079     dri_bo *slice_data_bo;
3080
3081     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3082     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3083
3084     /* one slice per frame */
3085     if (decode_state->num_slice_params != 1 ||
3086         (!decode_state->slice_params ||
3087          !decode_state->slice_params[0] ||
3088          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3089         (!decode_state->slice_datas ||
3090          !decode_state->slice_datas[0] ||
3091          !decode_state->slice_datas[0]->bo) ||
3092         !decode_state->probability_data) {
3093         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3094
3095         return;
3096     }
3097
3098     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3099     slice_data_bo = decode_state->slice_datas[0]->bo;
3100
3101     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3102     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3103     intel_batchbuffer_emit_mi_flush(batch);
3104     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3105     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3106     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3107     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3108     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3109     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3110     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3111     intel_batchbuffer_end_atomic(batch);
3112     intel_batchbuffer_flush(batch);
3113 }
3114
3115 static VAStatus
3116 gen8_mfd_decode_picture(VADriverContextP ctx,
3117                         VAProfile profile,
3118                         union codec_state *codec_state,
3119                         struct hw_context *hw_context)
3120
3121 {
3122     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3123     struct decode_state *decode_state = &codec_state->decode;
3124     VAStatus vaStatus;
3125
3126     assert(gen7_mfd_context);
3127
3128     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3129
3130     if (vaStatus != VA_STATUS_SUCCESS)
3131         goto out;
3132
3133     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3134
3135     switch (profile) {
3136     case VAProfileMPEG2Simple:
3137     case VAProfileMPEG2Main:
3138         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3139         break;
3140
3141     case VAProfileH264ConstrainedBaseline:
3142     case VAProfileH264Main:
3143     case VAProfileH264High:
3144     case VAProfileH264StereoHigh:
3145     case VAProfileH264MultiviewHigh:
3146         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3147         break;
3148
3149     case VAProfileVC1Simple:
3150     case VAProfileVC1Main:
3151     case VAProfileVC1Advanced:
3152         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3153         break;
3154
3155     case VAProfileJPEGBaseline:
3156         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3157         break;
3158
3159     case VAProfileVP8Version0_3:
3160         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3161         break;
3162
3163     default:
3164         assert(0);
3165         break;
3166     }
3167
3168     vaStatus = VA_STATUS_SUCCESS;
3169
3170 out:
3171     return vaStatus;
3172 }
3173
3174 static void
3175 gen8_mfd_context_destroy(void *hw_context)
3176 {
3177     VADriverContextP ctx;
3178     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3179
3180     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3181
3182     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3183     gen7_mfd_context->post_deblocking_output.bo = NULL;
3184
3185     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3186     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3187
3188     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3189     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3190
3191     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3192     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3193
3194     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3195     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3196
3197     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3198     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3199
3200     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3201     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3202
3203     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3204     gen7_mfd_context->segmentation_buffer.bo = NULL;
3205
3206     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3207
3208     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3209         i965_DestroySurfaces(ctx,
3210                              &gen7_mfd_context->jpeg_wa_surface_id,
3211                              1);
3212         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3213     }
3214
3215     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3216     free(gen7_mfd_context);
3217 }
3218
3219 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3220                                         struct gen7_mfd_context *gen7_mfd_context)
3221 {
3222     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3223     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3224     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3225     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3226 }
3227
3228 struct hw_context *
3229 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3230 {
3231     struct intel_driver_data *intel = intel_driver_data(ctx);
3232     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3233     int i;
3234
3235     if (!gen7_mfd_context)
3236         return NULL;
3237
3238     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3239     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3240     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3241
3242     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3243         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3244         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3245     }
3246
3247     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3248     gen7_mfd_context->segmentation_buffer.valid = 0;
3249
3250     switch (obj_config->profile) {
3251     case VAProfileMPEG2Simple:
3252     case VAProfileMPEG2Main:
3253         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3254         break;
3255
3256     case VAProfileH264ConstrainedBaseline:
3257     case VAProfileH264Main:
3258     case VAProfileH264High:
3259     case VAProfileH264StereoHigh:
3260     case VAProfileH264MultiviewHigh:
3261         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3262         break;
3263     default:
3264         break;
3265     }
3266
3267     gen7_mfd_context->driver_context = ctx;
3268     return (struct hw_context *)gen7_mfd_context;
3269 }