OSDN Git Service

Add vdenc common commands for CNL
[android-x86/hardware-intel-common-vaapi.git] / src / gen7_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include "sysdeps.h"
30
31 #include <va/va_dec_jpeg.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35 #include "i965_defines.h"
36 #include "i965_drv_video.h"
37 #include "i965_decoder_utils.h"
38
39 #include "gen7_mfd.h"
40 #include "intel_media.h"
41
42 static const uint32_t zigzag_direct[64] = {
43     0,   1,  8, 16,  9,  2,  3, 10,
44     17, 24, 32, 25, 18, 11,  4,  5,
45     12, 19, 26, 33, 40, 48, 41, 34,
46     27, 20, 13,  6,  7, 14, 21, 28,
47     35, 42, 49, 56, 57, 50, 43, 36,
48     29, 22, 15, 23, 30, 37, 44, 51,
49     58, 59, 52, 45, 38, 31, 39, 46,
50     53, 60, 61, 54, 47, 55, 62, 63
51 };
52
53 static void
54 gen7_mfd_init_avc_surface(VADriverContextP ctx,
55                           VAPictureParameterBufferH264 *pic_param,
56                           struct object_surface *obj_surface)
57 {
58     struct i965_driver_data *i965 = i965_driver_data(ctx);
59     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
60     int width_in_mbs, height_in_mbs;
61
62     obj_surface->free_private_data = gen_free_avc_surface;
63     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
64     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
65
66     if (!gen7_avc_surface) {
67         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
68         assert(gen7_avc_surface);
69         gen7_avc_surface->base.frame_store_id = -1;
70         assert((obj_surface->size & 0x3f) == 0);
71         obj_surface->private_data = gen7_avc_surface;
72     }
73
74     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
75                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
76
77     if (gen7_avc_surface->dmv_top == NULL) {
78         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
79                                                  "direct mv w/r buffer",
80                                                  width_in_mbs * (height_in_mbs + 1) * 64,
81                                                  0x1000);
82         assert(gen7_avc_surface->dmv_top);
83     }
84
85     if (gen7_avc_surface->dmv_bottom_flag &&
86         gen7_avc_surface->dmv_bottom == NULL) {
87         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
88                                                     "direct mv w/r buffer",
89                                                     width_in_mbs * (height_in_mbs + 1) * 64,
90                                                     0x1000);
91         assert(gen7_avc_surface->dmv_bottom);
92     }
93 }
94
95 static void
96 gen7_mfd_pipe_mode_select(VADriverContextP ctx,
97                           struct decode_state *decode_state,
98                           int standard_select,
99                           struct gen7_mfd_context *gen7_mfd_context)
100 {
101     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
102
103     assert(standard_select == MFX_FORMAT_MPEG2 ||
104            standard_select == MFX_FORMAT_AVC ||
105            standard_select == MFX_FORMAT_VC1 ||
106            standard_select == MFX_FORMAT_JPEG);
107
108     BEGIN_BCS_BATCH(batch, 5);
109     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
110     OUT_BCS_BATCH(batch,
111                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
112                   (MFD_MODE_VLD << 15) | /* VLD mode */
113                   (0 << 10) | /* disable Stream-Out */
114                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
115                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
116                   (0 << 5)  | /* not in stitch mode */
117                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
118                   (standard_select << 0));
119     OUT_BCS_BATCH(batch,
120                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
121                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
122                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
123                   (0 << 1)  |
124                   (0 << 0));
125     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
126     OUT_BCS_BATCH(batch, 0); /* reserved */
127     ADVANCE_BCS_BATCH(batch);
128 }
129
130 static void
131 gen7_mfd_surface_state(VADriverContextP ctx,
132                        struct decode_state *decode_state,
133                        int standard_select,
134                        struct gen7_mfd_context *gen7_mfd_context)
135 {
136     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
137     struct object_surface *obj_surface = decode_state->render_object;
138     unsigned int y_cb_offset;
139     unsigned int y_cr_offset;
140     unsigned int surface_format;
141
142     assert(obj_surface);
143
144     y_cb_offset = obj_surface->y_cb_offset;
145     y_cr_offset = obj_surface->y_cr_offset;
146
147     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
148                      MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
149
150     BEGIN_BCS_BATCH(batch, 6);
151     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
152     OUT_BCS_BATCH(batch, 0);
153     OUT_BCS_BATCH(batch,
154                   ((obj_surface->orig_height - 1) << 18) |
155                   ((obj_surface->orig_width - 1) << 4));
156     OUT_BCS_BATCH(batch,
157                   (surface_format << 28) | /* 420 planar YUV surface */
158                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
159                   (0 << 22) | /* surface object control state, ignored */
160                   ((obj_surface->width - 1) << 3) | /* pitch */
161                   (0 << 2)  | /* must be 0 */
162                   (1 << 1)  | /* must be tiled */
163                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
164     OUT_BCS_BATCH(batch,
165                   (0 << 16) | /* X offset for U(Cb), must be 0 */
166                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
167     OUT_BCS_BATCH(batch,
168                   (0 << 16) | /* X offset for V(Cr), must be 0 */
169                   ((standard_select == MFX_FORMAT_JPEG ? y_cr_offset : 0) << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
170     ADVANCE_BCS_BATCH(batch);
171 }
172
173 static void
174 gen7_mfd_pipe_buf_addr_state(VADriverContextP ctx,
175                              struct decode_state *decode_state,
176                              int standard_select,
177                              struct gen7_mfd_context *gen7_mfd_context)
178 {
179     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
180     int i;
181
182     BEGIN_BCS_BATCH(batch, 24);
183     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
184     if (gen7_mfd_context->pre_deblocking_output.valid)
185         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
186                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
187                       0);
188     else
189         OUT_BCS_BATCH(batch, 0);
190
191     if (gen7_mfd_context->post_deblocking_output.valid)
192         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
193                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194                       0);
195     else
196         OUT_BCS_BATCH(batch, 0);
197
198     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
199     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
200
201     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
202         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
203                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                       0);
205     else
206         OUT_BCS_BATCH(batch, 0);
207
208     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
209         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
210                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
211                       0);
212     else
213         OUT_BCS_BATCH(batch, 0);
214
215     /* DW 7..22 */
216     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
217         struct object_surface *obj_surface;
218
219         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
220             gen7_mfd_context->reference_surface[i].obj_surface &&
221             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
222             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
223
224             OUT_BCS_RELOC(batch, obj_surface->bo,
225                           I915_GEM_DOMAIN_INSTRUCTION, 0,
226                           0);
227         } else {
228             OUT_BCS_BATCH(batch, 0);
229         }
230     }
231
232     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
233     ADVANCE_BCS_BATCH(batch);
234 }
235
236 static void
237 gen7_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
238                                  dri_bo *slice_data_bo,
239                                  int standard_select,
240                                  struct gen7_mfd_context *gen7_mfd_context)
241 {
242     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
243
244     BEGIN_BCS_BATCH(batch, 11);
245     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
246     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
247     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
248     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
249     OUT_BCS_BATCH(batch, 0);
250     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
251     OUT_BCS_BATCH(batch, 0);
252     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
253     OUT_BCS_BATCH(batch, 0);
254     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
255     OUT_BCS_BATCH(batch, 0);
256     ADVANCE_BCS_BATCH(batch);
257 }
258
259 static void
260 gen7_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
261                                  struct decode_state *decode_state,
262                                  int standard_select,
263                                  struct gen7_mfd_context *gen7_mfd_context)
264 {
265     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
266
267     BEGIN_BCS_BATCH(batch, 4);
268     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
269
270     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
271         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
272                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
273                       0);
274     else
275         OUT_BCS_BATCH(batch, 0);
276
277     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
278         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
279                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
280                       0);
281     else
282         OUT_BCS_BATCH(batch, 0);
283
284     if (gen7_mfd_context->bitplane_read_buffer.valid)
285         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
286                       I915_GEM_DOMAIN_INSTRUCTION, 0,
287                       0);
288     else
289         OUT_BCS_BATCH(batch, 0);
290
291     ADVANCE_BCS_BATCH(batch);
292 }
293
294 static void
295 gen7_mfd_qm_state(VADriverContextP ctx,
296                   int qm_type,
297                   unsigned char *qm,
298                   int qm_length,
299                   struct gen7_mfd_context *gen7_mfd_context)
300 {
301     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
302     unsigned int qm_buffer[16];
303
304     assert(qm_length <= 16 * 4);
305     memcpy(qm_buffer, qm, qm_length);
306
307     BEGIN_BCS_BATCH(batch, 18);
308     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
309     OUT_BCS_BATCH(batch, qm_type << 0);
310     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
311     ADVANCE_BCS_BATCH(batch);
312 }
313
314 static void
315 gen7_mfd_avc_img_state(VADriverContextP ctx,
316                        struct decode_state *decode_state,
317                        struct gen7_mfd_context *gen7_mfd_context)
318 {
319     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
320     int img_struct;
321     int mbaff_frame_flag;
322     unsigned int width_in_mbs, height_in_mbs;
323     VAPictureParameterBufferH264 *pic_param;
324
325     assert(decode_state->pic_param && decode_state->pic_param->buffer);
326     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
327
328     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
329         img_struct = 1;
330     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
331         img_struct = 3;
332     else
333         img_struct = 0;
334
335     if ((img_struct & 0x1) == 0x1) {
336         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
337     } else {
338         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
339     }
340
341     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
342         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
343         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
344     } else {
345         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
346     }
347
348     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
349                         !pic_param->pic_fields.bits.field_pic_flag);
350
351     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
352     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
353
354     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
355     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
356            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
357     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
358
359     BEGIN_BCS_BATCH(batch, 16);
360     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
361     OUT_BCS_BATCH(batch,
362                   (width_in_mbs * height_in_mbs - 1));
363     OUT_BCS_BATCH(batch,
364                   ((height_in_mbs - 1) << 16) |
365                   ((width_in_mbs - 1) << 0));
366     OUT_BCS_BATCH(batch,
367                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
368                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
369                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
370                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
371                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
372                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
373                   (img_struct << 8));
374     OUT_BCS_BATCH(batch,
375                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
376                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
377                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
378                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
379                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
380                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
381                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
382                   (mbaff_frame_flag << 1) |
383                   (pic_param->pic_fields.bits.field_pic_flag << 0));
384     OUT_BCS_BATCH(batch, 0);
385     OUT_BCS_BATCH(batch, 0);
386     OUT_BCS_BATCH(batch, 0);
387     OUT_BCS_BATCH(batch, 0);
388     OUT_BCS_BATCH(batch, 0);
389     OUT_BCS_BATCH(batch, 0);
390     OUT_BCS_BATCH(batch, 0);
391     OUT_BCS_BATCH(batch, 0);
392     OUT_BCS_BATCH(batch, 0);
393     OUT_BCS_BATCH(batch, 0);
394     OUT_BCS_BATCH(batch, 0);
395     ADVANCE_BCS_BATCH(batch);
396 }
397
398 static void
399 gen7_mfd_avc_qm_state(VADriverContextP ctx,
400                       struct decode_state *decode_state,
401                       struct gen7_mfd_context *gen7_mfd_context)
402 {
403     VAIQMatrixBufferH264 *iq_matrix;
404     VAPictureParameterBufferH264 *pic_param;
405
406     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
407         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
408     else
409         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
410
411     assert(decode_state->pic_param && decode_state->pic_param->buffer);
412     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
413
414     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
415     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
416
417     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
418         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
419         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
420     }
421 }
422
423 static void
424 gen7_mfd_avc_directmode_state(VADriverContextP ctx,
425                               struct decode_state *decode_state,
426                               VAPictureParameterBufferH264 *pic_param,
427                               VASliceParameterBufferH264 *slice_param,
428                               struct gen7_mfd_context *gen7_mfd_context)
429 {
430     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
431     struct object_surface *obj_surface;
432     GenAvcSurface *gen7_avc_surface;
433     VAPictureH264 *va_pic;
434     int i;
435
436     BEGIN_BCS_BATCH(batch, 69);
437     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
438
439     /* reference surfaces 0..15 */
440     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
441         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
442             gen7_mfd_context->reference_surface[i].obj_surface &&
443             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
444
445             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
446             gen7_avc_surface = obj_surface->private_data;
447             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
448                           I915_GEM_DOMAIN_INSTRUCTION, 0,
449                           0);
450
451             if (gen7_avc_surface->dmv_bottom_flag == 1)
452                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
453                               I915_GEM_DOMAIN_INSTRUCTION, 0,
454                               0);
455             else
456                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
457                               I915_GEM_DOMAIN_INSTRUCTION, 0,
458                               0);
459         } else {
460             OUT_BCS_BATCH(batch, 0);
461             OUT_BCS_BATCH(batch, 0);
462         }
463     }
464
465     /* the current decoding frame/field */
466     va_pic = &pic_param->CurrPic;
467     obj_surface = decode_state->render_object;
468     assert(obj_surface->bo && obj_surface->private_data);
469     gen7_avc_surface = obj_surface->private_data;
470
471     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
472                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
473                   0);
474
475     if (gen7_avc_surface->dmv_bottom_flag == 1)
476         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
477                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
478                       0);
479     else
480         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
481                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
482                       0);
483
484     /* POC List */
485     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
486         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
487
488         if (obj_surface) {
489             const VAPictureH264 * const va_pic = avc_find_picture(
490                                                      obj_surface->base.id, pic_param->ReferenceFrames,
491                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
492
493             assert(va_pic != NULL);
494             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
495             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
496         } else {
497             OUT_BCS_BATCH(batch, 0);
498             OUT_BCS_BATCH(batch, 0);
499         }
500     }
501
502     va_pic = &pic_param->CurrPic;
503     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
504     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
505
506     ADVANCE_BCS_BATCH(batch);
507 }
508
509 static void
510 gen7_mfd_avc_phantom_slice_first(VADriverContextP ctx,
511                                  VAPictureParameterBufferH264 *pic_param,
512                                  VASliceParameterBufferH264 *next_slice_param,
513                                  struct gen7_mfd_context *gen7_mfd_context)
514 {
515     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
516 }
517
518 static void
519 gen7_mfd_avc_slice_state(VADriverContextP ctx,
520                          VAPictureParameterBufferH264 *pic_param,
521                          VASliceParameterBufferH264 *slice_param,
522                          VASliceParameterBufferH264 *next_slice_param,
523                          struct gen7_mfd_context *gen7_mfd_context)
524 {
525     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
526     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
527     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
528     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
529     int num_ref_idx_l0, num_ref_idx_l1;
530     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
531                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
532     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
533     int slice_type;
534
535     if (slice_param->slice_type == SLICE_TYPE_I ||
536         slice_param->slice_type == SLICE_TYPE_SI) {
537         slice_type = SLICE_TYPE_I;
538     } else if (slice_param->slice_type == SLICE_TYPE_P ||
539                slice_param->slice_type == SLICE_TYPE_SP) {
540         slice_type = SLICE_TYPE_P;
541     } else {
542         assert(slice_param->slice_type == SLICE_TYPE_B);
543         slice_type = SLICE_TYPE_B;
544     }
545
546     if (slice_type == SLICE_TYPE_I) {
547         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
548         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
549         num_ref_idx_l0 = 0;
550         num_ref_idx_l1 = 0;
551     } else if (slice_type == SLICE_TYPE_P) {
552         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
553         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
554         num_ref_idx_l1 = 0;
555     } else {
556         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
557         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
558     }
559
560     first_mb_in_slice = slice_param->first_mb_in_slice;
561     slice_hor_pos = first_mb_in_slice % width_in_mbs;
562     slice_ver_pos = first_mb_in_slice / width_in_mbs;
563
564     if (mbaff_picture)
565         slice_ver_pos = slice_ver_pos << 1;
566
567     if (next_slice_param) {
568         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
569         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
570         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
571
572         if (mbaff_picture)
573             next_slice_ver_pos = next_slice_ver_pos << 1;
574     } else {
575         next_slice_hor_pos = 0;
576         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
577     }
578
579     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
580     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
581     OUT_BCS_BATCH(batch, slice_type);
582     OUT_BCS_BATCH(batch,
583                   (num_ref_idx_l1 << 24) |
584                   (num_ref_idx_l0 << 16) |
585                   (slice_param->chroma_log2_weight_denom << 8) |
586                   (slice_param->luma_log2_weight_denom << 0));
587     OUT_BCS_BATCH(batch,
588                   (slice_param->direct_spatial_mv_pred_flag << 29) |
589                   (slice_param->disable_deblocking_filter_idc << 27) |
590                   (slice_param->cabac_init_idc << 24) |
591                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
592                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
593                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
594     OUT_BCS_BATCH(batch,
595                   (slice_ver_pos << 24) |
596                   (slice_hor_pos << 16) |
597                   (first_mb_in_slice << 0));
598     OUT_BCS_BATCH(batch,
599                   (next_slice_ver_pos << 16) |
600                   (next_slice_hor_pos << 0));
601     OUT_BCS_BATCH(batch,
602                   (next_slice_param == NULL) << 19); /* last slice flag */
603     OUT_BCS_BATCH(batch, 0);
604     OUT_BCS_BATCH(batch, 0);
605     OUT_BCS_BATCH(batch, 0);
606     OUT_BCS_BATCH(batch, 0);
607     ADVANCE_BCS_BATCH(batch);
608 }
609
610 static inline void
611 gen7_mfd_avc_ref_idx_state(VADriverContextP ctx,
612                            VAPictureParameterBufferH264 *pic_param,
613                            VASliceParameterBufferH264 *slice_param,
614                            struct gen7_mfd_context *gen7_mfd_context)
615 {
616     gen6_send_avc_ref_idx_state(
617         gen7_mfd_context->base.batch,
618         slice_param,
619         gen7_mfd_context->reference_surface
620     );
621 }
622
623 static void
624 gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
625                                 VAPictureParameterBufferH264 *pic_param,
626                                 VASliceParameterBufferH264 *slice_param,
627                                 struct gen7_mfd_context *gen7_mfd_context)
628 {
629     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
630     int i, j, num_weight_offset_table = 0;
631     short weightoffsets[32 * 6];
632
633     if ((slice_param->slice_type == SLICE_TYPE_P ||
634          slice_param->slice_type == SLICE_TYPE_SP) &&
635         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
636         num_weight_offset_table = 1;
637     }
638
639     if ((slice_param->slice_type == SLICE_TYPE_B) &&
640         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
641         num_weight_offset_table = 2;
642     }
643
644     for (i = 0; i < num_weight_offset_table; i++) {
645         BEGIN_BCS_BATCH(batch, 98);
646         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
647         OUT_BCS_BATCH(batch, i);
648
649         if (i == 0) {
650             for (j = 0; j < 32; j++) {
651                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
652                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
653                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
654                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
655                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
656                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
657             }
658         } else {
659             for (j = 0; j < 32; j++) {
660                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
661                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
662                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
663                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
664                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
665                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
666             }
667         }
668
669         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
670         ADVANCE_BCS_BATCH(batch);
671     }
672 }
673
674 static void
675 gen7_mfd_avc_bsd_object(VADriverContextP ctx,
676                         VAPictureParameterBufferH264 *pic_param,
677                         VASliceParameterBufferH264 *slice_param,
678                         dri_bo *slice_data_bo,
679                         VASliceParameterBufferH264 *next_slice_param,
680                         struct gen7_mfd_context *gen7_mfd_context)
681 {
682     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
683     unsigned int slice_data_bit_offset;
684
685     slice_data_bit_offset = avc_get_first_mb_bit_offset(
686                                 slice_data_bo,
687                                 slice_param,
688                                 pic_param->pic_fields.bits.entropy_coding_mode_flag
689                             );
690
691     /* the input bitsteam format on GEN7 differs from GEN6 */
692     BEGIN_BCS_BATCH(batch, 6);
693     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
694     OUT_BCS_BATCH(batch,
695                   (slice_param->slice_data_size - slice_param->slice_data_offset));
696     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
697     OUT_BCS_BATCH(batch,
698                   (0 << 31) |
699                   (0 << 14) |
700                   (0 << 12) |
701                   (0 << 10) |
702                   (0 << 8));
703     OUT_BCS_BATCH(batch,
704                   ((slice_data_bit_offset >> 3) << 16) |
705                   (1 << 7)  |
706                   (0 << 5)  |
707                   (0 << 4)  |
708                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
709                   (slice_data_bit_offset & 0x7));
710     OUT_BCS_BATCH(batch, 0);
711     ADVANCE_BCS_BATCH(batch);
712 }
713
714 static inline void
715 gen7_mfd_avc_context_init(
716     VADriverContextP         ctx,
717     struct gen7_mfd_context *gen7_mfd_context
718 )
719 {
720     /* Initialize flat scaling lists */
721     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
722 }
723
724 static void
725 gen7_mfd_avc_decode_init(VADriverContextP ctx,
726                          struct decode_state *decode_state,
727                          struct gen7_mfd_context *gen7_mfd_context)
728 {
729     VAPictureParameterBufferH264 *pic_param;
730     VASliceParameterBufferH264 *slice_param;
731     struct i965_driver_data *i965 = i965_driver_data(ctx);
732     struct object_surface *obj_surface;
733     dri_bo *bo;
734     int i, j, enable_avc_ildb = 0;
735     unsigned int width_in_mbs, height_in_mbs;
736
737     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
738         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
739         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
740
741         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
742             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
743             assert((slice_param->slice_type == SLICE_TYPE_I) ||
744                    (slice_param->slice_type == SLICE_TYPE_SI) ||
745                    (slice_param->slice_type == SLICE_TYPE_P) ||
746                    (slice_param->slice_type == SLICE_TYPE_SP) ||
747                    (slice_param->slice_type == SLICE_TYPE_B));
748
749             if (slice_param->disable_deblocking_filter_idc != 1) {
750                 enable_avc_ildb = 1;
751                 break;
752             }
753
754             slice_param++;
755         }
756     }
757
758     assert(decode_state->pic_param && decode_state->pic_param->buffer);
759     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
760     intel_update_avc_frame_store_index(ctx, decode_state, pic_param,
761                                        gen7_mfd_context->reference_surface, &gen7_mfd_context->fs_ctx);
762     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
763     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
764     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
765     assert(height_in_mbs > 0 && height_in_mbs <= 256);
766
767     /* Current decoded picture */
768     obj_surface = decode_state->render_object;
769     if (pic_param->pic_fields.bits.reference_pic_flag)
770         obj_surface->flags |= SURFACE_REFERENCED;
771     else
772         obj_surface->flags &= ~SURFACE_REFERENCED;
773
774     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
775     gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
776
777     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
778     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
779     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
780     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
781
782     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
783     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
784     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
785     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
786
787     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
788     bo = dri_bo_alloc(i965->intel.bufmgr,
789                       "intra row store",
790                       width_in_mbs * 64,
791                       0x1000);
792     assert(bo);
793     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
794     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
795
796     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
797     bo = dri_bo_alloc(i965->intel.bufmgr,
798                       "deblocking filter row store",
799                       width_in_mbs * 64 * 4,
800                       0x1000);
801     assert(bo);
802     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
803     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
804
805     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
806     bo = dri_bo_alloc(i965->intel.bufmgr,
807                       "bsd mpc row store",
808                       width_in_mbs * 64 * 2,
809                       0x1000);
810     assert(bo);
811     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
812     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
813
814     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
815     bo = dri_bo_alloc(i965->intel.bufmgr,
816                       "mpr row store",
817                       width_in_mbs * 64 * 2,
818                       0x1000);
819     assert(bo);
820     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
821     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
822
823     gen7_mfd_context->bitplane_read_buffer.valid = 0;
824 }
825
826 static void
827 gen7_mfd_avc_decode_picture(VADriverContextP ctx,
828                             struct decode_state *decode_state,
829                             struct gen7_mfd_context *gen7_mfd_context)
830 {
831     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
832     VAPictureParameterBufferH264 *pic_param;
833     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
834     dri_bo *slice_data_bo;
835     int i, j;
836
837     assert(decode_state->pic_param && decode_state->pic_param->buffer);
838     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
839     gen7_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
840
841     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
842     intel_batchbuffer_emit_mi_flush(batch);
843     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
844     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
845     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
846     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
847     gen7_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
848     gen7_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
849
850     for (j = 0; j < decode_state->num_slice_params; j++) {
851         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
852         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
853         slice_data_bo = decode_state->slice_datas[j]->bo;
854         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
855
856         if (j == decode_state->num_slice_params - 1)
857             next_slice_group_param = NULL;
858         else
859             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
860
861         if (j == 0 && slice_param->first_mb_in_slice)
862             gen7_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
863
864         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
865             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
866             assert((slice_param->slice_type == SLICE_TYPE_I) ||
867                    (slice_param->slice_type == SLICE_TYPE_SI) ||
868                    (slice_param->slice_type == SLICE_TYPE_P) ||
869                    (slice_param->slice_type == SLICE_TYPE_SP) ||
870                    (slice_param->slice_type == SLICE_TYPE_B));
871
872             if (i < decode_state->slice_params[j]->num_elements - 1)
873                 next_slice_param = slice_param + 1;
874             else
875                 next_slice_param = next_slice_group_param;
876
877             gen7_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
878             gen7_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
879             gen7_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
880             gen7_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
881             gen7_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
882             slice_param++;
883         }
884     }
885
886     intel_batchbuffer_end_atomic(batch);
887     intel_batchbuffer_flush(batch);
888 }
889
890 static void
891 gen7_mfd_mpeg2_decode_init(VADriverContextP ctx,
892                            struct decode_state *decode_state,
893                            struct gen7_mfd_context *gen7_mfd_context)
894 {
895     VAPictureParameterBufferMPEG2 *pic_param;
896     struct i965_driver_data *i965 = i965_driver_data(ctx);
897     struct object_surface *obj_surface;
898     dri_bo *bo;
899     unsigned int width_in_mbs;
900
901     assert(decode_state->pic_param && decode_state->pic_param->buffer);
902     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
903     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
904
905     mpeg2_set_reference_surfaces(
906         ctx,
907         gen7_mfd_context->reference_surface,
908         decode_state,
909         pic_param
910     );
911
912     /* Current decoded picture */
913     obj_surface = decode_state->render_object;
914     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
915
916     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
917     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
918     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
919     gen7_mfd_context->pre_deblocking_output.valid = 1;
920
921     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
922     bo = dri_bo_alloc(i965->intel.bufmgr,
923                       "bsd mpc row store",
924                       width_in_mbs * 96,
925                       0x1000);
926     assert(bo);
927     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
928     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
929
930     gen7_mfd_context->post_deblocking_output.valid = 0;
931     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
932     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
933     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
934     gen7_mfd_context->bitplane_read_buffer.valid = 0;
935 }
936
937 static void
938 gen7_mfd_mpeg2_pic_state(VADriverContextP ctx,
939                          struct decode_state *decode_state,
940                          struct gen7_mfd_context *gen7_mfd_context)
941 {
942     struct i965_driver_data * const i965 = i965_driver_data(ctx);
943     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
944     VAPictureParameterBufferMPEG2 *pic_param;
945     unsigned int slice_concealment_disable_bit = 0;
946
947     assert(decode_state->pic_param && decode_state->pic_param->buffer);
948     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
949
950     if (IS_HASWELL(i965->intel.device_info)) {
951         /* XXX: disable concealment for now */
952         slice_concealment_disable_bit = 1;
953     }
954
955     BEGIN_BCS_BATCH(batch, 13);
956     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
957     OUT_BCS_BATCH(batch,
958                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
959                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
960                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
961                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
962                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
963                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
964                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
965                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
966                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
967                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
968                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
969                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
970     OUT_BCS_BATCH(batch,
971                   pic_param->picture_coding_type << 9);
972     OUT_BCS_BATCH(batch,
973                   (slice_concealment_disable_bit << 31) |
974                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
975                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
976     OUT_BCS_BATCH(batch, 0);
977     OUT_BCS_BATCH(batch, 0);
978     OUT_BCS_BATCH(batch, 0);
979     OUT_BCS_BATCH(batch, 0);
980     OUT_BCS_BATCH(batch, 0);
981     OUT_BCS_BATCH(batch, 0);
982     OUT_BCS_BATCH(batch, 0);
983     OUT_BCS_BATCH(batch, 0);
984     OUT_BCS_BATCH(batch, 0);
985     ADVANCE_BCS_BATCH(batch);
986 }
987
988 static void
989 gen7_mfd_mpeg2_qm_state(VADriverContextP ctx,
990                         struct decode_state *decode_state,
991                         struct gen7_mfd_context *gen7_mfd_context)
992 {
993     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
994     int i, j;
995
996     /* Update internal QM state */
997     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
998         VAIQMatrixBufferMPEG2 * const iq_matrix =
999             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1000
1001         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1002             iq_matrix->load_intra_quantiser_matrix) {
1003             gen_iq_matrix->load_intra_quantiser_matrix =
1004                 iq_matrix->load_intra_quantiser_matrix;
1005             if (iq_matrix->load_intra_quantiser_matrix) {
1006                 for (j = 0; j < 64; j++)
1007                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1008                         iq_matrix->intra_quantiser_matrix[j];
1009             }
1010         }
1011
1012         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1013             iq_matrix->load_non_intra_quantiser_matrix) {
1014             gen_iq_matrix->load_non_intra_quantiser_matrix =
1015                 iq_matrix->load_non_intra_quantiser_matrix;
1016             if (iq_matrix->load_non_intra_quantiser_matrix) {
1017                 for (j = 0; j < 64; j++)
1018                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1019                         iq_matrix->non_intra_quantiser_matrix[j];
1020             }
1021         }
1022     }
1023
1024     /* Commit QM state to HW */
1025     for (i = 0; i < 2; i++) {
1026         unsigned char *qm = NULL;
1027         int qm_type;
1028
1029         if (i == 0) {
1030             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1031                 qm = gen_iq_matrix->intra_quantiser_matrix;
1032                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1033             }
1034         } else {
1035             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1036                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1037                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1038             }
1039         }
1040
1041         if (!qm)
1042             continue;
1043
1044         gen7_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1045     }
1046 }
1047
1048 uint32_t mpeg2_get_slice_data_length(dri_bo *slice_data_bo, VASliceParameterBufferMPEG2 *slice_param)
1049 {
1050     uint8_t *buf;
1051     uint32_t buf_offset = slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3);
1052     uint32_t buf_size = slice_param->slice_data_size - (slice_param->macroblock_offset >> 3);
1053     uint32_t i = 0;
1054
1055     dri_bo_map(slice_data_bo, 0);
1056     buf = (uint8_t *)slice_data_bo->virtual + buf_offset;
1057
1058     if (buf_size < 4)
1059         return buf_size;
1060
1061     while (i <= (buf_size - 4)) {
1062         if (buf[i + 2] > 1) {
1063             i += 3;
1064         } else if (buf[i + 1]) {
1065             i += 2;
1066         } else if (buf[i] || buf[i + 2] != 1) {
1067             i++;
1068         } else {
1069             break;
1070         }
1071     }
1072
1073     if (i <= (buf_size - 4))
1074         buf_size = i;
1075
1076     dri_bo_unmap(slice_data_bo);
1077     return buf_size;
1078 }
1079
1080 static void
1081 gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1082                           VAPictureParameterBufferMPEG2 *pic_param,
1083                           VASliceParameterBufferMPEG2 *slice_param,
1084                           dri_bo *slice_data_bo,
1085                           VASliceParameterBufferMPEG2 *next_slice_param,
1086                           struct gen7_mfd_context *gen7_mfd_context)
1087 {
1088     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1089     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1090     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1091     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1092
1093     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1094         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1095         is_field_pic = 1;
1096     is_field_pic_wa = is_field_pic &&
1097                       gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1098
1099     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1100     hpos0 = slice_param->slice_horizontal_position;
1101
1102     if (next_slice_param == NULL) {
1103         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1104         hpos1 = 0;
1105     } else {
1106         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1107         hpos1 = next_slice_param->slice_horizontal_position;
1108     }
1109
1110     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1111
1112     BEGIN_BCS_BATCH(batch, 5);
1113     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1114     OUT_BCS_BATCH(batch,
1115                   mpeg2_get_slice_data_length(slice_data_bo, slice_param));
1116     OUT_BCS_BATCH(batch,
1117                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1118     OUT_BCS_BATCH(batch,
1119                   hpos0 << 24 |
1120                   vpos0 << 16 |
1121                   mb_count << 8 |
1122                   (next_slice_param == NULL) << 5 |
1123                   (next_slice_param == NULL) << 3 |
1124                   (slice_param->macroblock_offset & 0x7));
1125     OUT_BCS_BATCH(batch,
1126                   (slice_param->quantiser_scale_code << 24) |
1127                   (IS_HASWELL(i965->intel.device_info) ? (vpos1 << 8 | hpos1) : 0));
1128     ADVANCE_BCS_BATCH(batch);
1129 }
1130
1131 static void
1132 gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1133                               struct decode_state *decode_state,
1134                               struct gen7_mfd_context *gen7_mfd_context)
1135 {
1136     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1137     VAPictureParameterBufferMPEG2 *pic_param;
1138     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1139     dri_bo *slice_data_bo;
1140     int i, j;
1141
1142     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1143     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1144
1145     gen7_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1146     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1147     intel_batchbuffer_emit_mi_flush(batch);
1148     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1149     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1150     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1151     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1152     gen7_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1153     gen7_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1154
1155     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1156         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1157             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1158
1159     for (j = 0; j < decode_state->num_slice_params; j++) {
1160         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1161         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1162         slice_data_bo = decode_state->slice_datas[j]->bo;
1163         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1164
1165         if (j == decode_state->num_slice_params - 1)
1166             next_slice_group_param = NULL;
1167         else
1168             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1169
1170         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1171             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1172
1173             if (i < decode_state->slice_params[j]->num_elements - 1)
1174                 next_slice_param = slice_param + 1;
1175             else
1176                 next_slice_param = next_slice_group_param;
1177
1178             gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1179             slice_param++;
1180         }
1181     }
1182
1183     intel_batchbuffer_end_atomic(batch);
1184     intel_batchbuffer_flush(batch);
1185 }
1186
1187 static const int va_to_gen7_vc1_mv[4] = {
1188     1, /* 1-MV */
1189     2, /* 1-MV half-pel */
1190     3, /* 1-MV half-pef bilinear */
1191     0, /* Mixed MV */
1192 };
1193
1194 static const int b_picture_scale_factor[21] = {
1195     128, 85,  170, 64,  192,
1196     51,  102, 153, 204, 43,
1197     215, 37,  74,  111, 148,
1198     185, 222, 32,  96,  160,
1199     224,
1200 };
1201
1202 static const int va_to_gen7_vc1_condover[3] = {
1203     0,
1204     2,
1205     3
1206 };
1207
1208 static const int fptype_to_picture_type[8][2] = {
1209     {GEN7_VC1_I_PICTURE, GEN7_VC1_I_PICTURE},
1210     {GEN7_VC1_I_PICTURE, GEN7_VC1_P_PICTURE},
1211     {GEN7_VC1_P_PICTURE, GEN7_VC1_I_PICTURE},
1212     {GEN7_VC1_P_PICTURE, GEN7_VC1_P_PICTURE},
1213     {GEN7_VC1_B_PICTURE, GEN7_VC1_B_PICTURE},
1214     {GEN7_VC1_B_PICTURE, GEN7_VC1_BI_PICTURE},
1215     {GEN7_VC1_BI_PICTURE, GEN7_VC1_B_PICTURE},
1216     {GEN7_VC1_BI_PICTURE, GEN7_VC1_BI_PICTURE}
1217 };
1218
1219 static void
1220 gen7_mfd_free_vc1_surface(void **data)
1221 {
1222     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1223
1224     if (!gen7_vc1_surface)
1225         return;
1226
1227     dri_bo_unreference(gen7_vc1_surface->dmv_top);
1228     dri_bo_unreference(gen7_vc1_surface->dmv_bottom);
1229     free(gen7_vc1_surface);
1230     *data = NULL;
1231 }
1232
1233 static void
1234 gen7_mfd_init_vc1_surface(VADriverContextP ctx,
1235                           VAPictureParameterBufferVC1 *pic_param,
1236                           struct object_surface *obj_surface)
1237 {
1238     struct i965_driver_data *i965 = i965_driver_data(ctx);
1239     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1240     int height_in_mbs;
1241     int picture_type;
1242     int is_first_field = 1;
1243
1244     if (!pic_param->sequence_fields.bits.interlace ||
1245         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1246         picture_type = pic_param->picture_fields.bits.picture_type;
1247     } else {/* Field-Interlace */
1248         is_first_field = pic_param->picture_fields.bits.is_first_field;
1249         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1250     }
1251
1252     obj_surface->free_private_data = gen7_mfd_free_vc1_surface;
1253
1254     if (!gen7_vc1_surface) {
1255         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1256         assert(gen7_vc1_surface);
1257         assert((obj_surface->size & 0x3f) == 0);
1258         obj_surface->private_data = gen7_vc1_surface;
1259     }
1260
1261     if (!pic_param->sequence_fields.bits.interlace ||
1262         pic_param->picture_fields.bits.frame_coding_mode < 2 || /* Progressive or Frame-Interlace */
1263         is_first_field) {
1264         gen7_vc1_surface->picture_type_top = 0;
1265         gen7_vc1_surface->picture_type_bottom = 0;
1266         gen7_vc1_surface->intensity_compensation_top = 0;
1267         gen7_vc1_surface->intensity_compensation_bottom = 0;
1268         gen7_vc1_surface->luma_scale_top[0] = 0;
1269         gen7_vc1_surface->luma_scale_top[1] = 0;
1270         gen7_vc1_surface->luma_scale_bottom[0] = 0;
1271         gen7_vc1_surface->luma_scale_bottom[1] = 0;
1272         gen7_vc1_surface->luma_shift_top[0] = 0;
1273         gen7_vc1_surface->luma_shift_top[1] = 0;
1274         gen7_vc1_surface->luma_shift_bottom[0] = 0;
1275         gen7_vc1_surface->luma_shift_bottom[1] = 0;
1276     }
1277
1278     if (!pic_param->sequence_fields.bits.interlace ||
1279         pic_param->picture_fields.bits.frame_coding_mode < 2) { /* Progressive or Frame-Interlace */
1280         gen7_vc1_surface->picture_type_top = picture_type;
1281         gen7_vc1_surface->picture_type_bottom = picture_type;
1282     } else if (pic_param->picture_fields.bits.top_field_first ^ is_first_field)
1283         gen7_vc1_surface->picture_type_bottom = picture_type;
1284     else
1285         gen7_vc1_surface->picture_type_top = picture_type;
1286
1287     /*
1288      * The Direct MV buffer is scalable with frame height, but
1289      * does not scale with frame width as the hardware assumes
1290      * that frame width is fixed at 128 MBs.
1291      */
1292
1293     if (gen7_vc1_surface->dmv_top == NULL) {
1294         height_in_mbs = ALIGN(obj_surface->orig_height, 16) / 16;
1295         gen7_vc1_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
1296                                              "direct mv w/r buffer",
1297                                              128 * height_in_mbs * 64,
1298                                              0x1000);
1299     }
1300
1301     if (pic_param->sequence_fields.bits.interlace &&
1302         gen7_vc1_surface->dmv_bottom == NULL) {
1303         height_in_mbs = ALIGN(obj_surface->orig_height, 32) / 32;
1304         gen7_vc1_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
1305                                              "direct mv w/r buffer",
1306                                              128 * height_in_mbs * 64,
1307                                              0x1000);
1308     }
1309 }
1310
1311 static void
1312 gen7_mfd_vc1_decode_init(VADriverContextP ctx,
1313                          struct decode_state *decode_state,
1314                          struct gen7_mfd_context *gen7_mfd_context)
1315 {
1316     VAPictureParameterBufferVC1 *pic_param;
1317     struct i965_driver_data *i965 = i965_driver_data(ctx);
1318     struct object_surface *obj_surface;
1319     struct gen7_vc1_surface *gen7_vc1_current_surface;
1320     struct gen7_vc1_surface *gen7_vc1_forward_surface;
1321     dri_bo *bo;
1322     int width_in_mbs;
1323     int picture_type;
1324     int is_first_field = 1;
1325     int i;
1326
1327     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1328     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1329     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1330
1331     if (!pic_param->sequence_fields.bits.interlace ||
1332         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1333         picture_type = pic_param->picture_fields.bits.picture_type;
1334     } else {/* Field-Interlace */
1335         is_first_field = pic_param->picture_fields.bits.is_first_field;
1336         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1337     }
1338
1339     /* Current decoded picture */
1340     obj_surface = decode_state->render_object;
1341     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1342     gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1343
1344     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1345     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1346     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1347
1348     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1349     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1350     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1351
1352     if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1353         gen7_mfd_context->post_deblocking_output.valid = 0;
1354         gen7_mfd_context->pre_deblocking_output.valid = 1;
1355     } else {
1356         gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1357         gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1358     }
1359
1360     intel_update_vc1_frame_store_index(ctx,
1361                                        decode_state,
1362                                        pic_param,
1363                                        gen7_mfd_context->reference_surface);
1364
1365     if (picture_type == GEN7_VC1_P_PICTURE) {
1366         obj_surface = decode_state->reference_objects[0];
1367         gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1368         if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1369             obj_surface)
1370             gen7_vc1_forward_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1371         else
1372             gen7_vc1_forward_surface = NULL;
1373
1374         if (!pic_param->sequence_fields.bits.interlace ||
1375             pic_param->picture_fields.bits.frame_coding_mode == 0) { /* Progressive */
1376             if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1377                 if (gen7_vc1_forward_surface) {
1378                     gen7_vc1_forward_surface->intensity_compensation_top = 1;
1379                     gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
1380                     gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
1381                     gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
1382                     gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
1383                     gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
1384                 }
1385             }
1386         } else if (pic_param->sequence_fields.bits.interlace &&
1387             pic_param->picture_fields.bits.frame_coding_mode == 1) { /* Frame-Interlace */
1388             if (pic_param->picture_fields.bits.intensity_compensation) {
1389                 if (gen7_vc1_forward_surface) {
1390                     gen7_vc1_forward_surface->intensity_compensation_top = 1;
1391                     gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
1392                     gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
1393                     gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
1394                     gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
1395                     gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
1396                 }
1397             }
1398         } else if (pic_param->sequence_fields.bits.interlace &&
1399                    pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
1400             if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1401                 if (pic_param->intensity_compensation_field == 1 || /* Top field */
1402                     pic_param->intensity_compensation_field == 0) { /* Both fields */
1403                     if (is_first_field) {
1404                         if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1405                              (pic_param->reference_fields.bits.reference_field_pic_indicator ==
1406                              pic_param->picture_fields.bits.top_field_first)) ||
1407                             pic_param->reference_fields.bits.num_reference_pictures) {
1408                             if (gen7_vc1_forward_surface) {
1409                                 i = gen7_vc1_forward_surface->intensity_compensation_top++;
1410                                 gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
1411                                 gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
1412                             }
1413                         }
1414                     } else { /* Second field */
1415                         if (pic_param->picture_fields.bits.top_field_first) {
1416                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1417                                  !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1418                                 pic_param->reference_fields.bits.num_reference_pictures) {
1419                                 i = gen7_vc1_current_surface->intensity_compensation_top++;
1420                                 gen7_vc1_current_surface->luma_scale_top[i] = pic_param->luma_scale;
1421                                 gen7_vc1_current_surface->luma_shift_top[i] = pic_param->luma_shift;
1422                             }
1423                         } else {
1424                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1425                                  pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1426                                 pic_param->reference_fields.bits.num_reference_pictures) {
1427                                 if (gen7_vc1_forward_surface) {
1428                                     i = gen7_vc1_forward_surface->intensity_compensation_top++;
1429                                     gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
1430                                     gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
1431                                 }
1432                             }
1433                         }
1434                     }
1435                 }
1436                 if (pic_param->intensity_compensation_field == 2 || /* Bottom field */
1437                     pic_param->intensity_compensation_field == 0) { /* Both fields */
1438                     if (is_first_field) {
1439                         if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1440                              (pic_param->reference_fields.bits.reference_field_pic_indicator ^
1441                               pic_param->picture_fields.bits.top_field_first)) ||
1442                             pic_param->reference_fields.bits.num_reference_pictures) {
1443                             if (gen7_vc1_forward_surface) {
1444                                 i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
1445                                 if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1446                                     gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1447                                     gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1448                                 } else { /* Both fields */
1449                                     gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1450                                     gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1451                                 }
1452                             }
1453                         }
1454                     } else { /* Second field */
1455                         if (pic_param->picture_fields.bits.top_field_first) {
1456                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1457                                  pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1458                                 pic_param->reference_fields.bits.num_reference_pictures) {
1459                                 if (gen7_vc1_forward_surface) {
1460                                     i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
1461                                     if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1462                                         gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1463                                         gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1464                                     } else { /* Both fields */
1465                                         gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1466                                         gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1467                                     }
1468                                 }
1469                             }
1470                         } else {
1471                            if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1472                                  !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1473                                 pic_param->reference_fields.bits.num_reference_pictures) {
1474                                 i = gen7_vc1_current_surface->intensity_compensation_bottom++;
1475                                if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1476                                    gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1477                                    gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1478                                 } else { /* Both fields */
1479                                     gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1480                                     gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1481                                 }
1482                             }
1483                         }
1484                     }
1485                 }
1486             }
1487         }
1488     }
1489
1490     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1491     bo = dri_bo_alloc(i965->intel.bufmgr,
1492                       "intra row store",
1493                       width_in_mbs * 64,
1494                       0x1000);
1495     assert(bo);
1496     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1497     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1498
1499     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1500     bo = dri_bo_alloc(i965->intel.bufmgr,
1501                       "deblocking filter row store",
1502                       width_in_mbs * 7 * 64,
1503                       0x1000);
1504     assert(bo);
1505     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1506     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1507
1508     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1509     bo = dri_bo_alloc(i965->intel.bufmgr,
1510                       "bsd mpc row store",
1511                       width_in_mbs * 96,
1512                       0x1000);
1513     assert(bo);
1514     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1515     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1516
1517     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1518
1519     if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1520         gen7_mfd_context->bitplane_read_buffer.valid = 1;
1521     else
1522         gen7_mfd_context->bitplane_read_buffer.valid = !!(pic_param->bitplane_present.value & 0x7f);
1523     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1524
1525     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1526         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1527         int height_in_mbs;
1528         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1529         int src_w, src_h;
1530         uint8_t *src = NULL, *dst = NULL;
1531
1532         if (!pic_param->sequence_fields.bits.interlace ||
1533             (pic_param->picture_fields.bits.frame_coding_mode < 2)) /* Progressive or Frame-Interlace */
1534             height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1535         else /* Field-Interlace */
1536             height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
1537
1538         bo = dri_bo_alloc(i965->intel.bufmgr,
1539                           "VC-1 Bitplane",
1540                           bitplane_width * height_in_mbs,
1541                           0x1000);
1542         assert(bo);
1543         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1544
1545         dri_bo_map(bo, True);
1546         assert(bo->virtual);
1547         dst = bo->virtual;
1548
1549         if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1550             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1551                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1552                     int dst_index;
1553                     uint8_t src_value = 0x2;
1554
1555                     dst_index = src_w / 2;
1556                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1557                 }
1558
1559                 if (src_w & 1)
1560                     dst[src_w / 2] >>= 4;
1561
1562                 dst += bitplane_width;
1563             }
1564         } else {
1565             assert(decode_state->bit_plane->buffer);
1566             src = decode_state->bit_plane->buffer;
1567
1568             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1569                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1570                     int src_index, dst_index;
1571                     int src_shift;
1572                     uint8_t src_value;
1573
1574                     src_index = (src_h * width_in_mbs + src_w) / 2;
1575                     src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1576                     src_value = ((src[src_index] >> src_shift) & 0xf);
1577
1578                     dst_index = src_w / 2;
1579                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1580                 }
1581
1582                 if (src_w & 1)
1583                     dst[src_w / 2] >>= 4;
1584
1585                 dst += bitplane_width;
1586             }
1587         }
1588
1589         dri_bo_unmap(bo);
1590     } else
1591         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1592 }
1593
1594 static void
1595 gen7_mfd_vc1_pic_state(VADriverContextP ctx,
1596                        struct decode_state *decode_state,
1597                        struct gen7_mfd_context *gen7_mfd_context)
1598 {
1599     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1600     VAPictureParameterBufferVC1 *pic_param;
1601     struct object_surface *obj_surface;
1602     struct gen7_vc1_surface *gen7_vc1_current_surface;
1603     struct gen7_vc1_surface *gen7_vc1_reference_surface;
1604     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1605     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1606     int unified_mv_mode = 0;
1607     int ref_field_pic_polarity = 0;
1608     int scale_factor = 0;
1609     int trans_ac_y = 0;
1610     int dmv_surface_valid = 0;
1611     int frfd = 0;
1612     int brfd = 0;
1613     int fcm = 0;
1614     int picture_type;
1615     int ptype;
1616     int overlap = 0;
1617     int interpolation_mode = 0;
1618     int height_in_mbs;
1619     int is_first_field = 1;
1620     int loopfilter = 0;
1621     int bitplane_present;
1622     int range_reduction = 0;
1623     int range_reduction_scale = 0;
1624     int forward_mb = 0, mv_type_mb = 0, skip_mb = 0, direct_mb = 0;
1625     int overflags = 0, ac_pred = 0, field_tx = 0;
1626
1627     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1628     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1629
1630     if (!pic_param->sequence_fields.bits.interlace ||
1631         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1632         picture_type = pic_param->picture_fields.bits.picture_type;
1633         height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1634     } else {/* Field-Interlace */
1635         is_first_field = pic_param->picture_fields.bits.is_first_field;
1636         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1637         height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
1638     }
1639
1640     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1641     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1642     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1643     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1644     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1645     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1646     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1647
1648     if (dquant == 0) {
1649         alt_pquant_config = 0;
1650         alt_pquant_edge_mask = 0;
1651     } else if (dquant == 2) {
1652         alt_pquant_config = 1;
1653         alt_pquant_edge_mask = 0xf;
1654     } else {
1655         assert(dquant == 1);
1656         if (dquantfrm == 0) {
1657             alt_pquant_config = 0;
1658             alt_pquant_edge_mask = 0;
1659             alt_pq = 0;
1660         } else {
1661             assert(dquantfrm == 1);
1662             alt_pquant_config = 1;
1663
1664             switch (dqprofile) {
1665             case 3:
1666                 if (dqbilevel == 0) {
1667                     alt_pquant_config = 2;
1668                     alt_pquant_edge_mask = 0;
1669                 } else {
1670                     assert(dqbilevel == 1);
1671                     alt_pquant_config = 3;
1672                     alt_pquant_edge_mask = 0;
1673                 }
1674                 break;
1675
1676             case 0:
1677                 alt_pquant_edge_mask = 0xf;
1678                 break;
1679
1680             case 1:
1681                 if (dqdbedge == 3)
1682                     alt_pquant_edge_mask = 0x9;
1683                 else
1684                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1685
1686                 break;
1687
1688             case 2:
1689                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1690                 break;
1691
1692             default:
1693                 assert(0);
1694             }
1695         }
1696     }
1697
1698     if (pic_param->sequence_fields.bits.profile == 1 && /* Main Profile */
1699         pic_param->sequence_fields.bits.rangered) {
1700         obj_surface = decode_state->reference_objects[0];
1701
1702         gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1703
1704         if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1705             obj_surface)
1706             gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1707         else
1708             gen7_vc1_reference_surface = NULL;
1709
1710         if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1711             if (gen7_vc1_reference_surface)
1712                 gen7_vc1_current_surface->range_reduction_frame = gen7_vc1_reference_surface->range_reduction_frame;
1713             else
1714                 gen7_vc1_current_surface->range_reduction_frame = 0;
1715         else
1716             gen7_vc1_current_surface->range_reduction_frame = pic_param->range_reduction_frame;
1717
1718         if (gen7_vc1_reference_surface) {
1719             if (gen7_vc1_current_surface->range_reduction_frame &&
1720                 !gen7_vc1_reference_surface->range_reduction_frame) {
1721                 range_reduction = 1;
1722                 range_reduction_scale = 0;
1723             } else if (!gen7_vc1_current_surface->range_reduction_frame &&
1724                        gen7_vc1_reference_surface->range_reduction_frame) {
1725                 range_reduction = 1;
1726                 range_reduction_scale = 1;
1727             }
1728         }
1729     }
1730
1731     if ((!pic_param->sequence_fields.bits.interlace ||
1732          pic_param->picture_fields.bits.frame_coding_mode != 1) && /* Progressive or Field-Interlace */
1733         (picture_type == GEN7_VC1_P_PICTURE ||
1734          picture_type == GEN7_VC1_B_PICTURE)) {
1735         if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1736             assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1737             unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1738         } else {
1739             assert(pic_param->mv_fields.bits.mv_mode < 4);
1740             unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1741         }
1742     }
1743
1744     if (pic_param->sequence_fields.bits.interlace &&
1745         pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
1746         picture_type == GEN7_VC1_P_PICTURE &&
1747         !pic_param->reference_fields.bits.num_reference_pictures) {
1748         if (pic_param->reference_fields.bits.reference_field_pic_indicator == 0) {
1749             ref_field_pic_polarity = is_first_field ?
1750                                         pic_param->picture_fields.bits.top_field_first :
1751                                         !pic_param->picture_fields.bits.top_field_first;
1752         } else {
1753             ref_field_pic_polarity = is_first_field ?
1754                                         !pic_param->picture_fields.bits.top_field_first :
1755                                         pic_param->picture_fields.bits.top_field_first;
1756         }
1757     }
1758
1759     if (pic_param->b_picture_fraction < 21)
1760         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1761
1762     if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1763         ptype = GEN7_VC1_P_PICTURE;
1764         bitplane_present = 1;
1765     } else {
1766         ptype = pic_param->picture_fields.bits.picture_type;
1767         bitplane_present = !!(pic_param->bitplane_present.value & 0x7f);
1768         forward_mb = pic_param->raw_coding.flags.forward_mb;
1769         mv_type_mb = pic_param->raw_coding.flags.mv_type_mb;
1770         skip_mb = pic_param->raw_coding.flags.skip_mb;
1771         direct_mb = pic_param->raw_coding.flags.direct_mb;
1772         overflags = pic_param->raw_coding.flags.overflags;
1773         ac_pred = pic_param->raw_coding.flags.ac_pred;
1774         field_tx = pic_param->raw_coding.flags.field_tx;
1775         loopfilter = pic_param->entrypoint_fields.bits.loopfilter;
1776     }
1777
1778     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1779         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1780     else {
1781         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1782         /*
1783          * 8.3.6.2.1 Transform Type Selection
1784          * If variable-sized transform coding is not enabled,
1785          * then the 8x8 transform shall be used for all blocks.
1786          * it is also MFX_VC1_PIC_STATE requirement.
1787          */
1788         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1789             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1790             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1791         }
1792     }
1793
1794     if (picture_type == GEN7_VC1_B_PICTURE) {
1795         obj_surface = decode_state->reference_objects[1];
1796
1797         if (pic_param->backward_reference_picture != VA_INVALID_ID &&
1798             obj_surface)
1799             gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1800         else
1801             gen7_vc1_reference_surface = NULL;
1802
1803         if (gen7_vc1_reference_surface) {
1804             if (pic_param->sequence_fields.bits.interlace &&
1805                 pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
1806                 pic_param->picture_fields.bits.top_field_first ^ is_first_field) {
1807                 if (gen7_vc1_reference_surface->picture_type_bottom == GEN7_VC1_P_PICTURE)
1808                     dmv_surface_valid = 1;
1809             } else if (gen7_vc1_reference_surface->picture_type_top == GEN7_VC1_P_PICTURE)
1810                 dmv_surface_valid = 1;
1811         }
1812     }
1813
1814     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1815
1816     if (pic_param->sequence_fields.bits.interlace) {
1817         if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1818             fcm = pic_param->picture_fields.bits.frame_coding_mode;
1819         else if (!pic_param->picture_fields.bits.top_field_first)
1820             fcm = 3; /* Field with bottom field first */
1821         else
1822             fcm = 2; /* Field with top field first */
1823     }
1824
1825     if (pic_param->sequence_fields.bits.interlace &&
1826         pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
1827         if (picture_type == GEN7_VC1_I_PICTURE ||
1828              picture_type == GEN7_VC1_P_PICTURE) {
1829             gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1830
1831             if (is_first_field)
1832                 gen7_vc1_current_surface->reference_distance = pic_param->reference_fields.bits.reference_distance;
1833
1834             frfd = gen7_vc1_current_surface->reference_distance;
1835         } else if (picture_type == GEN7_VC1_B_PICTURE) {
1836             obj_surface = decode_state->reference_objects[1];
1837
1838             if (pic_param->backward_reference_picture != VA_INVALID_ID &&
1839                 obj_surface)
1840                 gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1841             else
1842                 gen7_vc1_reference_surface = NULL;
1843
1844             if (gen7_vc1_reference_surface) {
1845                 frfd = (scale_factor * gen7_vc1_reference_surface->reference_distance) >> 8;
1846
1847                 brfd = gen7_vc1_reference_surface->reference_distance - frfd - 1;
1848                 if (brfd < 0)
1849                     brfd = 0;
1850             }
1851         }
1852     }
1853
1854     if (pic_param->sequence_fields.bits.overlap) {
1855         if (pic_param->sequence_fields.bits.profile == 3) { /* Advanced Profile */
1856             if (picture_type == GEN7_VC1_P_PICTURE &&
1857                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1858                 overlap = 1;
1859             }
1860             if (picture_type == GEN7_VC1_I_PICTURE ||
1861                 picture_type == GEN7_VC1_BI_PICTURE) {
1862                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1863                     overlap = 1;
1864                 } else if (pic_param->conditional_overlap_flag == 1 || /* all block boundaries */
1865                            pic_param->conditional_overlap_flag == 2) { /* coded by OVERFLAGSMB bitplane */
1866                     overlap = 1;
1867                 }
1868             }
1869         } else {
1870             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1871                 picture_type != GEN7_VC1_B_PICTURE) {
1872                 overlap = 1;
1873             }
1874         }
1875     }
1876
1877     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1878         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1879          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1880         interpolation_mode = 8 | pic_param->fast_uvmc_flag;
1881     else
1882         interpolation_mode = 0 | pic_param->fast_uvmc_flag;
1883
1884     BEGIN_BCS_BATCH(batch, 6);
1885     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1886     OUT_BCS_BATCH(batch,
1887                   ((height_in_mbs - 1) << 16) |
1888                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1889     OUT_BCS_BATCH(batch,
1890                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1891                   dmv_surface_valid << 15 |
1892                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1893                   pic_param->rounding_control << 13 |
1894                   pic_param->sequence_fields.bits.syncmarker << 12 |
1895                   interpolation_mode << 8 |
1896                   range_reduction_scale << 7 |
1897                   range_reduction << 6 |
1898                   loopfilter << 5 |
1899                   overlap << 4 |
1900                   !is_first_field << 3 |
1901                   (pic_param->sequence_fields.bits.profile == 3) << 0); /* Advanced Profile */
1902     OUT_BCS_BATCH(batch,
1903                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1904                   ptype << 26 |
1905                   fcm << 24 |
1906                   alt_pq << 16 |
1907                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1908                   scale_factor << 0);
1909     OUT_BCS_BATCH(batch,
1910                   unified_mv_mode << 28 |
1911                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1912                   pic_param->fast_uvmc_flag << 26 |
1913                   ref_field_pic_polarity << 25 |
1914                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1915                   brfd << 20 |
1916                   frfd << 16 |
1917                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1918                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1919                   alt_pquant_edge_mask << 4 |
1920                   alt_pquant_config << 2 |
1921                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1922                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1923     OUT_BCS_BATCH(batch,
1924                   bitplane_present << 31 |
1925                   forward_mb << 30 |
1926                   mv_type_mb << 29 |
1927                   skip_mb << 28 |
1928                   direct_mb << 27 |
1929                   overflags << 26 |
1930                   ac_pred << 25 |
1931                   field_tx << 24 |
1932                   pic_param->mv_fields.bits.mv_table << 20 |
1933                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1934                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1935                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1936                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1937                   pic_param->mb_mode_table << 8 |
1938                   trans_ac_y << 6 |
1939                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1940                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1941                   pic_param->cbp_table << 0);
1942     ADVANCE_BCS_BATCH(batch);
1943 }
1944
1945 static void
1946 gen7_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1947                              struct decode_state *decode_state,
1948                              struct gen7_mfd_context *gen7_mfd_context)
1949 {
1950     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1951     VAPictureParameterBufferVC1 *pic_param;
1952     struct gen7_vc1_surface *gen7_vc1_top_surface;
1953     struct gen7_vc1_surface *gen7_vc1_bottom_surface;
1954     int picture_type;
1955     int is_first_field = 1;
1956     int intensitycomp_single_fwd = 0;
1957     int intensitycomp_single_bwd = 0;
1958     int intensitycomp_double_fwd = 0;
1959     int lumscale1_single_fwd = 0;
1960     int lumscale2_single_fwd = 0;
1961     int lumshift1_single_fwd = 0;
1962     int lumshift2_single_fwd = 0;
1963     int lumscale1_single_bwd = 0;
1964     int lumscale2_single_bwd = 0;
1965     int lumshift1_single_bwd = 0;
1966     int lumshift2_single_bwd = 0;
1967     int lumscale1_double_fwd = 0;
1968     int lumscale2_double_fwd = 0;
1969     int lumshift1_double_fwd = 0;
1970     int lumshift2_double_fwd = 0;
1971     int replication_mode = 0;
1972
1973     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1974     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1975
1976     if (!pic_param->sequence_fields.bits.interlace ||
1977         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1978         picture_type = pic_param->picture_fields.bits.picture_type;
1979     } else {/* Field-Interlace */
1980         is_first_field = pic_param->picture_fields.bits.is_first_field;
1981         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1982     }
1983
1984     if (picture_type == GEN7_VC1_P_PICTURE ||
1985         picture_type == GEN7_VC1_B_PICTURE) {
1986         if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID)
1987             gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[0].obj_surface->private_data);
1988         else
1989             gen7_vc1_top_surface = NULL;
1990
1991         if (gen7_vc1_top_surface) {
1992             intensitycomp_single_fwd = !!gen7_vc1_top_surface->intensity_compensation_top;
1993             lumscale1_single_fwd = gen7_vc1_top_surface->luma_scale_top[0];
1994             lumshift1_single_fwd = gen7_vc1_top_surface->luma_shift_top[0];
1995             if (gen7_vc1_top_surface->intensity_compensation_top == 2) {
1996                 intensitycomp_double_fwd = 1;
1997                 lumscale1_double_fwd = gen7_vc1_top_surface->luma_scale_top[1];
1998                 lumshift1_double_fwd = gen7_vc1_top_surface->luma_shift_top[1];
1999             }
2000         }
2001
2002         if (pic_param->sequence_fields.bits.interlace &&
2003             pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2004             if (gen7_mfd_context->reference_surface[2].surface_id != VA_INVALID_ID)
2005                 gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[2].obj_surface->private_data);
2006             else
2007                 gen7_vc1_bottom_surface = NULL;
2008
2009             if (gen7_vc1_bottom_surface) {
2010                 intensitycomp_single_fwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
2011                 lumscale2_single_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
2012                 lumshift2_single_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
2013                 if (gen7_vc1_bottom_surface->intensity_compensation_bottom == 2) {
2014                     intensitycomp_double_fwd |= 2;
2015                     lumscale2_double_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[1];
2016                     lumshift2_double_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[1];
2017                 }
2018             }
2019         }
2020     }
2021
2022     if (picture_type == GEN7_VC1_B_PICTURE) {
2023         if (gen7_mfd_context->reference_surface[1].surface_id != VA_INVALID_ID)
2024             gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[1].obj_surface->private_data);
2025         else
2026             gen7_vc1_top_surface = NULL;
2027
2028         if (gen7_vc1_top_surface) {
2029             intensitycomp_single_bwd = !!gen7_vc1_top_surface->intensity_compensation_top;
2030             lumscale1_single_bwd = gen7_vc1_top_surface->luma_scale_top[0];
2031             lumshift1_single_bwd = gen7_vc1_top_surface->luma_shift_top[0];
2032         }
2033
2034         if (pic_param->sequence_fields.bits.interlace &&
2035             pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2036             if (gen7_mfd_context->reference_surface[3].surface_id != VA_INVALID_ID)
2037                 gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[3].obj_surface->private_data);
2038             else
2039                 gen7_vc1_bottom_surface = NULL;
2040
2041             if (gen7_vc1_bottom_surface) {
2042                 intensitycomp_single_bwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
2043                 lumscale2_single_bwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
2044                 lumshift2_single_bwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
2045             }
2046         }
2047     }
2048
2049     if (pic_param->sequence_fields.bits.interlace &&
2050         pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2051         if (picture_type == GEN7_VC1_P_PICTURE)
2052             replication_mode = 0x5;
2053         else if (picture_type == GEN7_VC1_B_PICTURE)
2054             replication_mode = 0xf;
2055     }
2056
2057     BEGIN_BCS_BATCH(batch, 6);
2058     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
2059     OUT_BCS_BATCH(batch,
2060                   intensitycomp_double_fwd << 14 |
2061                   0 << 12 |
2062                   intensitycomp_single_fwd << 10 |
2063                   intensitycomp_single_bwd << 8 |
2064                   replication_mode << 4 |
2065                   0);
2066     OUT_BCS_BATCH(batch,
2067                   lumshift2_single_fwd << 24 |
2068                   lumshift1_single_fwd << 16 |
2069                   lumscale2_single_fwd << 8 |
2070                   lumscale1_single_fwd << 0);
2071     OUT_BCS_BATCH(batch,
2072                   lumshift2_double_fwd << 24 |
2073                   lumshift1_double_fwd << 16 |
2074                   lumscale2_double_fwd << 8 |
2075                   lumscale1_double_fwd << 0);
2076     OUT_BCS_BATCH(batch,
2077                   lumshift2_single_bwd << 24 |
2078                   lumshift1_single_bwd << 16 |
2079                   lumscale2_single_bwd << 8 |
2080                   lumscale1_single_bwd << 0);
2081     OUT_BCS_BATCH(batch,
2082                   0 << 24 |
2083                   0 << 16 |
2084                   0 << 8 |
2085                   0 << 0);
2086     ADVANCE_BCS_BATCH(batch);
2087 }
2088
2089
2090 static void
2091 gen7_mfd_vc1_directmode_state(VADriverContextP ctx,
2092                               struct decode_state *decode_state,
2093                               struct gen7_mfd_context *gen7_mfd_context)
2094 {
2095     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2096     VAPictureParameterBufferVC1 *pic_param;
2097     struct object_surface *obj_surface;
2098     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2099     int picture_type;
2100     int is_first_field = 1;
2101
2102     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2103
2104     if (!pic_param->sequence_fields.bits.interlace ||
2105         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
2106         picture_type = pic_param->picture_fields.bits.picture_type;
2107     } else {/* Field-Interlace */
2108         is_first_field = pic_param->picture_fields.bits.is_first_field;
2109         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
2110     }
2111
2112     if (picture_type == GEN7_VC1_P_PICTURE ||
2113         picture_type == GEN7_VC1_SKIPPED_PICTURE) {
2114         obj_surface = decode_state->render_object;
2115
2116         if (pic_param->sequence_fields.bits.interlace &&
2117             (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2118             (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2119             dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2120         else
2121             dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2122     }
2123
2124     if (picture_type == GEN7_VC1_B_PICTURE) {
2125         obj_surface = decode_state->reference_objects[1];
2126         if (pic_param->backward_reference_picture != VA_INVALID_ID &&
2127             obj_surface &&
2128             obj_surface->private_data) {
2129
2130             if (pic_param->sequence_fields.bits.interlace &&
2131                 (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2132                 (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2133                 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2134             else
2135                 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2136         }
2137     }
2138
2139     BEGIN_BCS_BATCH(batch, 3);
2140     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
2141
2142     if (dmv_write_buffer)
2143         OUT_BCS_RELOC(batch, dmv_write_buffer,
2144                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2145                       0);
2146     else
2147         OUT_BCS_BATCH(batch, 0);
2148
2149     if (dmv_read_buffer)
2150         OUT_BCS_RELOC(batch, dmv_read_buffer,
2151                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2152                       0);
2153     else
2154         OUT_BCS_BATCH(batch, 0);
2155
2156     ADVANCE_BCS_BATCH(batch);
2157 }
2158
2159 static int
2160 gen7_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
2161 {
2162     int out_slice_data_bit_offset;
2163     int slice_header_size = in_slice_data_bit_offset / 8;
2164     int i, j;
2165
2166     if (profile == 3 && slice_header_size) { /* Advanced Profile */
2167         for (i = 0, j = 0; i < slice_header_size - 1; i++, j++)
2168             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4)
2169                     i++, j += 2;
2170
2171         if (i == slice_header_size - 1) {
2172             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
2173                 buf[j + 2] = 0;
2174                 j++;
2175             }
2176
2177             j++;
2178         }
2179
2180         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
2181     } else /* Simple or Main Profile */
2182         out_slice_data_bit_offset = in_slice_data_bit_offset;
2183
2184     return out_slice_data_bit_offset;
2185 }
2186
2187 static void
2188 gen7_mfd_vc1_bsd_object(VADriverContextP ctx,
2189                         VAPictureParameterBufferVC1 *pic_param,
2190                         VASliceParameterBufferVC1 *slice_param,
2191                         VASliceParameterBufferVC1 *next_slice_param,
2192                         dri_bo *slice_data_bo,
2193                         struct gen7_mfd_context *gen7_mfd_context)
2194 {
2195     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2196     int next_slice_start_vert_pos;
2197     int macroblock_offset;
2198     uint8_t *slice_data = NULL;
2199
2200     dri_bo_map(slice_data_bo, True);
2201     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
2202     macroblock_offset = gen7_mfd_vc1_get_macroblock_bit_offset(slice_data,
2203                                                                slice_param->macroblock_offset,
2204                                                                pic_param->sequence_fields.bits.profile);
2205     dri_bo_unmap(slice_data_bo);
2206
2207     if (next_slice_param)
2208         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
2209     else if (!pic_param->sequence_fields.bits.interlace ||
2210              pic_param->picture_fields.bits.frame_coding_mode < 2) /* Progressive or Frame-Interlace */
2211         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
2212     else /* Field-Interlace */
2213         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 32) / 32;
2214
2215     BEGIN_BCS_BATCH(batch, 5);
2216     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
2217     OUT_BCS_BATCH(batch,
2218                   slice_param->slice_data_size - (macroblock_offset >> 3));
2219     OUT_BCS_BATCH(batch,
2220                   slice_param->slice_data_offset + (macroblock_offset >> 3));
2221     OUT_BCS_BATCH(batch,
2222                   slice_param->slice_vertical_position << 16 |
2223                   next_slice_start_vert_pos << 0);
2224     OUT_BCS_BATCH(batch,
2225                   (macroblock_offset & 0x7));
2226     ADVANCE_BCS_BATCH(batch);
2227 }
2228
2229 static void
2230 gen7_mfd_vc1_decode_picture(VADriverContextP ctx,
2231                             struct decode_state *decode_state,
2232                             struct gen7_mfd_context *gen7_mfd_context)
2233 {
2234     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2235     VAPictureParameterBufferVC1 *pic_param;
2236     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
2237     dri_bo *slice_data_bo;
2238     int i, j;
2239
2240     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2241     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2242
2243     gen7_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
2244     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2245     intel_batchbuffer_emit_mi_flush(batch);
2246     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2247     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2248     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2249     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2250     gen7_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
2251     gen7_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
2252     gen7_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
2253
2254     for (j = 0; j < decode_state->num_slice_params; j++) {
2255         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2256         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2257         slice_data_bo = decode_state->slice_datas[j]->bo;
2258         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
2259
2260         if (j == decode_state->num_slice_params - 1)
2261             next_slice_group_param = NULL;
2262         else
2263             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2264
2265         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2266             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2267
2268             if (i < decode_state->slice_params[j]->num_elements - 1)
2269                 next_slice_param = slice_param + 1;
2270             else
2271                 next_slice_param = next_slice_group_param;
2272
2273             gen7_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2274             slice_param++;
2275         }
2276     }
2277
2278     intel_batchbuffer_end_atomic(batch);
2279     intel_batchbuffer_flush(batch);
2280 }
2281
2282 static void
2283 gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
2284                           struct decode_state *decode_state,
2285                           struct gen7_mfd_context *gen7_mfd_context)
2286 {
2287     struct object_surface *obj_surface;
2288     VAPictureParameterBufferJPEGBaseline *pic_param;
2289     int subsampling = SUBSAMPLE_YUV420;
2290     int fourcc = VA_FOURCC_IMC3;
2291
2292     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2293
2294     if (pic_param->num_components == 1) {
2295         subsampling = SUBSAMPLE_YUV400;
2296         fourcc = VA_FOURCC_Y800;
2297     } else if (pic_param->num_components == 3) {
2298         int h1 = pic_param->components[0].h_sampling_factor;
2299         int h2 = pic_param->components[1].h_sampling_factor;
2300         int h3 = pic_param->components[2].h_sampling_factor;
2301         int v1 = pic_param->components[0].v_sampling_factor;
2302         int v2 = pic_param->components[1].v_sampling_factor;
2303         int v3 = pic_param->components[2].v_sampling_factor;
2304
2305         if (h1 == 2 * h2 && h2 == h3 &&
2306             v1 == 2 * v2 && v2 == v3) {
2307             subsampling = SUBSAMPLE_YUV420;
2308             fourcc = VA_FOURCC_IMC3;
2309         } else if (h1 == 2 * h2  && h2 == h3 &&
2310                    v1 == v2 && v2 == v3) {
2311             subsampling = SUBSAMPLE_YUV422H;
2312             fourcc = VA_FOURCC_422H;
2313         } else if (h1 == h2 && h2 == h3 &&
2314                    v1 == v2  && v2 == v3) {
2315             subsampling = SUBSAMPLE_YUV444;
2316             fourcc = VA_FOURCC_444P;
2317         } else if (h1 == 4 * h2 && h2 ==  h3 &&
2318                    v1 == v2 && v2 == v3) {
2319             subsampling = SUBSAMPLE_YUV411;
2320             fourcc = VA_FOURCC_411P;
2321         } else if (h1 == h2 && h2 == h3 &&
2322                    v1 == 2 * v2 && v2 == v3) {
2323             subsampling = SUBSAMPLE_YUV422V;
2324             fourcc = VA_FOURCC_422V;
2325         } else
2326             assert(0);
2327     } else {
2328         assert(0);
2329     }
2330
2331     /* Current decoded picture */
2332     obj_surface = decode_state->render_object;
2333     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
2334
2335     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2336     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2337     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2338     gen7_mfd_context->pre_deblocking_output.valid = 1;
2339
2340     gen7_mfd_context->post_deblocking_output.bo = NULL;
2341     gen7_mfd_context->post_deblocking_output.valid = 0;
2342
2343     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2344     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2345
2346     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2347     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2348
2349     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2350     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2351
2352     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2353     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2354
2355     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2356     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2357 }
2358
2359 static const int va_to_gen7_jpeg_rotation[4] = {
2360     GEN7_JPEG_ROTATION_0,
2361     GEN7_JPEG_ROTATION_90,
2362     GEN7_JPEG_ROTATION_180,
2363     GEN7_JPEG_ROTATION_270
2364 };
2365
2366 static void
2367 gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
2368                         struct decode_state *decode_state,
2369                         struct gen7_mfd_context *gen7_mfd_context)
2370 {
2371     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2372     VAPictureParameterBufferJPEGBaseline *pic_param;
2373     int chroma_type = GEN7_YUV420;
2374     int frame_width_in_blks;
2375     int frame_height_in_blks;
2376
2377     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2378     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2379
2380     if (pic_param->num_components == 1)
2381         chroma_type = GEN7_YUV400;
2382     else if (pic_param->num_components == 3) {
2383         int h1 = pic_param->components[0].h_sampling_factor;
2384         int h2 = pic_param->components[1].h_sampling_factor;
2385         int h3 = pic_param->components[2].h_sampling_factor;
2386         int v1 = pic_param->components[0].v_sampling_factor;
2387         int v2 = pic_param->components[1].v_sampling_factor;
2388         int v3 = pic_param->components[2].v_sampling_factor;
2389
2390         if (h1 == 2 * h2 && h2 == h3 &&
2391             v1 == 2 * v2 && v2 == v3)
2392             chroma_type = GEN7_YUV420;
2393         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2394                  v1 == 1 && v2 == 1 && v3 == 1)
2395             chroma_type = GEN7_YUV422H_2Y;
2396         else if (h1 == h2 && h2 == h3 &&
2397                  v1 == v2 && v2 == v3)
2398             chroma_type = GEN7_YUV444;
2399         else if (h1 == 4 * h2 && h2 == h3 &&
2400                  v1 == v2 && v2 == v3)
2401             chroma_type = GEN7_YUV411;
2402         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2403                  v1 == 2 && v2 == 1 && v3 == 1)
2404             chroma_type = GEN7_YUV422V_2Y;
2405         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2406                  v1 == 2 && v2 == 2 && v3 == 2)
2407             chroma_type = GEN7_YUV422H_4Y;
2408         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2409                  v1 == 2 && v2 == 1 && v3 == 1)
2410             chroma_type = GEN7_YUV422V_4Y;
2411         else
2412             assert(0);
2413     }
2414
2415     if (chroma_type == GEN7_YUV400 ||
2416         chroma_type == GEN7_YUV444 ||
2417         chroma_type == GEN7_YUV422V_2Y) {
2418         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2419         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2420     } else if (chroma_type == GEN7_YUV411) {
2421         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2422         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2423     } else {
2424         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2425         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2426     }
2427
2428     BEGIN_BCS_BATCH(batch, 3);
2429     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2430     OUT_BCS_BATCH(batch,
2431                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2432                   (chroma_type << 0));
2433     OUT_BCS_BATCH(batch,
2434                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2435                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2436     ADVANCE_BCS_BATCH(batch);
2437 }
2438
2439 static const int va_to_gen7_jpeg_hufftable[2] = {
2440     MFX_HUFFTABLE_ID_Y,
2441     MFX_HUFFTABLE_ID_UV
2442 };
2443
2444 static void
2445 gen7_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2446                                struct decode_state *decode_state,
2447                                struct gen7_mfd_context *gen7_mfd_context,
2448                                int num_tables)
2449 {
2450     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2451     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2452     int index;
2453
2454     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2455         return;
2456
2457     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2458
2459     for (index = 0; index < num_tables; index++) {
2460         int id = va_to_gen7_jpeg_hufftable[index];
2461         if (!huffman_table->load_huffman_table[index])
2462             continue;
2463         BEGIN_BCS_BATCH(batch, 53);
2464         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2465         OUT_BCS_BATCH(batch, id);
2466         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2467         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2468         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2469         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2470         ADVANCE_BCS_BATCH(batch);
2471     }
2472 }
2473
2474 static const int va_to_gen7_jpeg_qm[5] = {
2475     -1,
2476     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2477     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2478     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2479     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2480 };
2481
2482 static void
2483 gen7_mfd_jpeg_qm_state(VADriverContextP ctx,
2484                        struct decode_state *decode_state,
2485                        struct gen7_mfd_context *gen7_mfd_context)
2486 {
2487     VAPictureParameterBufferJPEGBaseline *pic_param;
2488     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2489     int index;
2490
2491     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2492         return;
2493
2494     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2495     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2496
2497     assert(pic_param->num_components <= 3);
2498
2499     for (index = 0; index < pic_param->num_components; index++) {
2500         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2501         int qm_type;
2502         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2503         unsigned char raster_qm[64];
2504         int j;
2505
2506         if (id > 4 || id < 1)
2507             continue;
2508
2509         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2510             continue;
2511
2512         qm_type = va_to_gen7_jpeg_qm[id];
2513
2514         for (j = 0; j < 64; j++)
2515             raster_qm[zigzag_direct[j]] = qm[j];
2516
2517         gen7_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2518     }
2519 }
2520
2521 static void
2522 gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
2523                          VAPictureParameterBufferJPEGBaseline *pic_param,
2524                          VASliceParameterBufferJPEGBaseline *slice_param,
2525                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2526                          dri_bo *slice_data_bo,
2527                          struct gen7_mfd_context *gen7_mfd_context)
2528 {
2529     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2530     int scan_component_mask = 0;
2531     int i;
2532
2533     assert(slice_param->num_components > 0);
2534     assert(slice_param->num_components < 4);
2535     assert(slice_param->num_components <= pic_param->num_components);
2536
2537     for (i = 0; i < slice_param->num_components; i++) {
2538         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2539         case 1:
2540             scan_component_mask |= (1 << 0);
2541             break;
2542         case 2:
2543             scan_component_mask |= (1 << 1);
2544             break;
2545         case 3:
2546             scan_component_mask |= (1 << 2);
2547             break;
2548         default:
2549             assert(0);
2550             break;
2551         }
2552     }
2553
2554     BEGIN_BCS_BATCH(batch, 6);
2555     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2556     OUT_BCS_BATCH(batch,
2557                   slice_param->slice_data_size);
2558     OUT_BCS_BATCH(batch,
2559                   slice_param->slice_data_offset);
2560     OUT_BCS_BATCH(batch,
2561                   slice_param->slice_horizontal_position << 16 |
2562                   slice_param->slice_vertical_position << 0);
2563     OUT_BCS_BATCH(batch,
2564                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2565                   (scan_component_mask << 27) |                 /* scan components */
2566                   (0 << 26) |   /* disable interrupt allowed */
2567                   (slice_param->num_mcus << 0));                /* MCU count */
2568     OUT_BCS_BATCH(batch,
2569                   (slice_param->restart_interval << 0));    /* RestartInterval */
2570     ADVANCE_BCS_BATCH(batch);
2571 }
2572
2573 /* Workaround for JPEG decoding on Ivybridge */
2574
2575 static struct {
2576     int width;
2577     int height;
2578     unsigned char data[32];
2579     int data_size;
2580     int data_bit_offset;
2581     int qp;
2582 } gen7_jpeg_wa_clip = {
2583     16,
2584     16,
2585     {
2586         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2587         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2588     },
2589     14,
2590     40,
2591     28,
2592 };
2593
2594 static void
2595 gen7_jpeg_wa_init(VADriverContextP ctx,
2596                   struct gen7_mfd_context *gen7_mfd_context)
2597 {
2598     struct i965_driver_data *i965 = i965_driver_data(ctx);
2599     VAStatus status;
2600     struct object_surface *obj_surface;
2601
2602     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2603         i965_DestroySurfaces(ctx,
2604                              &gen7_mfd_context->jpeg_wa_surface_id,
2605                              1);
2606
2607     status = i965_CreateSurfaces(ctx,
2608                                  gen7_jpeg_wa_clip.width,
2609                                  gen7_jpeg_wa_clip.height,
2610                                  VA_RT_FORMAT_YUV420,
2611                                  1,
2612                                  &gen7_mfd_context->jpeg_wa_surface_id);
2613     assert(status == VA_STATUS_SUCCESS);
2614
2615     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2616     assert(obj_surface);
2617     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2618     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2619
2620     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2621         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2622                                                                "JPEG WA data",
2623                                                                0x1000,
2624                                                                0x1000);
2625         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2626                        0,
2627                        gen7_jpeg_wa_clip.data_size,
2628                        gen7_jpeg_wa_clip.data);
2629     }
2630 }
2631
2632 static void
2633 gen7_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2634                               struct gen7_mfd_context *gen7_mfd_context)
2635 {
2636     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2637
2638     BEGIN_BCS_BATCH(batch, 5);
2639     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2640     OUT_BCS_BATCH(batch,
2641                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2642                   (MFD_MODE_VLD << 15) | /* VLD mode */
2643                   (0 << 10) | /* disable Stream-Out */
2644                   (0 << 9)  | /* Post Deblocking Output */
2645                   (1 << 8)  | /* Pre Deblocking Output */
2646                   (0 << 5)  | /* not in stitch mode */
2647                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2648                   (MFX_FORMAT_AVC << 0));
2649     OUT_BCS_BATCH(batch,
2650                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2651                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2652                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2653                   (0 << 1)  |
2654                   (0 << 0));
2655     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2656     OUT_BCS_BATCH(batch, 0); /* reserved */
2657     ADVANCE_BCS_BATCH(batch);
2658 }
2659
2660 static void
2661 gen7_jpeg_wa_surface_state(VADriverContextP ctx,
2662                            struct gen7_mfd_context *gen7_mfd_context)
2663 {
2664     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2665     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2666
2667     BEGIN_BCS_BATCH(batch, 6);
2668     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2669     OUT_BCS_BATCH(batch, 0);
2670     OUT_BCS_BATCH(batch,
2671                   ((obj_surface->orig_width - 1) << 18) |
2672                   ((obj_surface->orig_height - 1) << 4));
2673     OUT_BCS_BATCH(batch,
2674                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2675                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2676                   (0 << 22) | /* surface object control state, ignored */
2677                   ((obj_surface->width - 1) << 3) | /* pitch */
2678                   (0 << 2)  | /* must be 0 */
2679                   (1 << 1)  | /* must be tiled */
2680                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2681     OUT_BCS_BATCH(batch,
2682                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2683                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2684     OUT_BCS_BATCH(batch,
2685                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2686                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2687     ADVANCE_BCS_BATCH(batch);
2688 }
2689
2690 static void
2691 gen7_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2692                                  struct gen7_mfd_context *gen7_mfd_context)
2693 {
2694     struct i965_driver_data *i965 = i965_driver_data(ctx);
2695     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2696     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2697     dri_bo *intra_bo;
2698     int i;
2699
2700     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2701                             "intra row store",
2702                             128 * 64,
2703                             0x1000);
2704
2705     BEGIN_BCS_BATCH(batch, 24);
2706     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
2707     OUT_BCS_RELOC(batch,
2708                   obj_surface->bo,
2709                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2710                   0);
2711
2712     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2713
2714     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2715     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2716
2717     OUT_BCS_RELOC(batch,
2718                   intra_bo,
2719                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2720                   0);
2721
2722     OUT_BCS_BATCH(batch, 0);
2723
2724     /* DW 7..22 */
2725     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2726         OUT_BCS_BATCH(batch, 0);
2727     }
2728
2729     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2730     ADVANCE_BCS_BATCH(batch);
2731
2732     dri_bo_unreference(intra_bo);
2733 }
2734
2735 static void
2736 gen7_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2737                                      struct gen7_mfd_context *gen7_mfd_context)
2738 {
2739     struct i965_driver_data *i965 = i965_driver_data(ctx);
2740     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2741     dri_bo *bsd_mpc_bo, *mpr_bo;
2742
2743     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2744                               "bsd mpc row store",
2745                               11520, /* 1.5 * 120 * 64 */
2746                               0x1000);
2747
2748     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2749                           "mpr row store",
2750                           7680, /* 1. 0 * 120 * 64 */
2751                           0x1000);
2752
2753     BEGIN_BCS_BATCH(batch, 4);
2754     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2755
2756     OUT_BCS_RELOC(batch,
2757                   bsd_mpc_bo,
2758                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2759                   0);
2760
2761     OUT_BCS_RELOC(batch,
2762                   mpr_bo,
2763                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2764                   0);
2765     OUT_BCS_BATCH(batch, 0);
2766
2767     ADVANCE_BCS_BATCH(batch);
2768
2769     dri_bo_unreference(bsd_mpc_bo);
2770     dri_bo_unreference(mpr_bo);
2771 }
2772
2773 static void
2774 gen7_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2775                           struct gen7_mfd_context *gen7_mfd_context)
2776 {
2777
2778 }
2779
2780 static void
2781 gen7_jpeg_wa_avc_img_state(VADriverContextP ctx,
2782                            struct gen7_mfd_context *gen7_mfd_context)
2783 {
2784     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2785     int img_struct = 0;
2786     int mbaff_frame_flag = 0;
2787     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2788
2789     BEGIN_BCS_BATCH(batch, 16);
2790     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2791     OUT_BCS_BATCH(batch,
2792                   (width_in_mbs * height_in_mbs - 1));
2793     OUT_BCS_BATCH(batch,
2794                   ((height_in_mbs - 1) << 16) |
2795                   ((width_in_mbs - 1) << 0));
2796     OUT_BCS_BATCH(batch,
2797                   (0 << 24) |
2798                   (0 << 16) |
2799                   (0 << 14) |
2800                   (0 << 13) |
2801                   (0 << 12) | /* differ from GEN6 */
2802                   (0 << 10) |
2803                   (img_struct << 8));
2804     OUT_BCS_BATCH(batch,
2805                   (1 << 10) | /* 4:2:0 */
2806                   (1 << 7) |  /* CABAC */
2807                   (0 << 6) |
2808                   (0 << 5) |
2809                   (0 << 4) |
2810                   (0 << 3) |
2811                   (1 << 2) |
2812                   (mbaff_frame_flag << 1) |
2813                   (0 << 0));
2814     OUT_BCS_BATCH(batch, 0);
2815     OUT_BCS_BATCH(batch, 0);
2816     OUT_BCS_BATCH(batch, 0);
2817     OUT_BCS_BATCH(batch, 0);
2818     OUT_BCS_BATCH(batch, 0);
2819     OUT_BCS_BATCH(batch, 0);
2820     OUT_BCS_BATCH(batch, 0);
2821     OUT_BCS_BATCH(batch, 0);
2822     OUT_BCS_BATCH(batch, 0);
2823     OUT_BCS_BATCH(batch, 0);
2824     OUT_BCS_BATCH(batch, 0);
2825     ADVANCE_BCS_BATCH(batch);
2826 }
2827
2828 static void
2829 gen7_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2830                                   struct gen7_mfd_context *gen7_mfd_context)
2831 {
2832     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2833     int i;
2834
2835     BEGIN_BCS_BATCH(batch, 69);
2836     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
2837
2838     /* reference surfaces 0..15 */
2839     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2840         OUT_BCS_BATCH(batch, 0); /* top */
2841         OUT_BCS_BATCH(batch, 0); /* bottom */
2842     }
2843
2844     /* the current decoding frame/field */
2845     OUT_BCS_BATCH(batch, 0); /* top */
2846     OUT_BCS_BATCH(batch, 0); /* bottom */
2847
2848     /* POC List */
2849     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2850         OUT_BCS_BATCH(batch, 0);
2851         OUT_BCS_BATCH(batch, 0);
2852     }
2853
2854     OUT_BCS_BATCH(batch, 0);
2855     OUT_BCS_BATCH(batch, 0);
2856
2857     ADVANCE_BCS_BATCH(batch);
2858 }
2859
2860 static void
2861 gen7_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2862                                      struct gen7_mfd_context *gen7_mfd_context)
2863 {
2864     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2865
2866     BEGIN_BCS_BATCH(batch, 11);
2867     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2868     OUT_BCS_RELOC(batch,
2869                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2870                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2871                   0);
2872     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2873     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2874     OUT_BCS_BATCH(batch, 0);
2875     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2876     OUT_BCS_BATCH(batch, 0);
2877     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2878     OUT_BCS_BATCH(batch, 0);
2879     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2880     OUT_BCS_BATCH(batch, 0);
2881     ADVANCE_BCS_BATCH(batch);
2882 }
2883
2884 static void
2885 gen7_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2886                             struct gen7_mfd_context *gen7_mfd_context)
2887 {
2888     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2889
2890     /* the input bitsteam format on GEN7 differs from GEN6 */
2891     BEGIN_BCS_BATCH(batch, 6);
2892     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2893     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2894     OUT_BCS_BATCH(batch, 0);
2895     OUT_BCS_BATCH(batch,
2896                   (0 << 31) |
2897                   (0 << 14) |
2898                   (0 << 12) |
2899                   (0 << 10) |
2900                   (0 << 8));
2901     OUT_BCS_BATCH(batch,
2902                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2903                   (0 << 5)  |
2904                   (0 << 4)  |
2905                   (1 << 3) | /* LastSlice Flag */
2906                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2907     OUT_BCS_BATCH(batch, 0);
2908     ADVANCE_BCS_BATCH(batch);
2909 }
2910
2911 static void
2912 gen7_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2913                              struct gen7_mfd_context *gen7_mfd_context)
2914 {
2915     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2916     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2917     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2918     int first_mb_in_slice = 0;
2919     int slice_type = SLICE_TYPE_I;
2920
2921     BEGIN_BCS_BATCH(batch, 11);
2922     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2923     OUT_BCS_BATCH(batch, slice_type);
2924     OUT_BCS_BATCH(batch,
2925                   (num_ref_idx_l1 << 24) |
2926                   (num_ref_idx_l0 << 16) |
2927                   (0 << 8) |
2928                   (0 << 0));
2929     OUT_BCS_BATCH(batch,
2930                   (0 << 29) |
2931                   (1 << 27) |   /* disable Deblocking */
2932                   (0 << 24) |
2933                   (gen7_jpeg_wa_clip.qp << 16) |
2934                   (0 << 8) |
2935                   (0 << 0));
2936     OUT_BCS_BATCH(batch,
2937                   (slice_ver_pos << 24) |
2938                   (slice_hor_pos << 16) |
2939                   (first_mb_in_slice << 0));
2940     OUT_BCS_BATCH(batch,
2941                   (next_slice_ver_pos << 16) |
2942                   (next_slice_hor_pos << 0));
2943     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2944     OUT_BCS_BATCH(batch, 0);
2945     OUT_BCS_BATCH(batch, 0);
2946     OUT_BCS_BATCH(batch, 0);
2947     OUT_BCS_BATCH(batch, 0);
2948     ADVANCE_BCS_BATCH(batch);
2949 }
2950
2951 static void
2952 gen7_mfd_jpeg_wa(VADriverContextP ctx,
2953                  struct gen7_mfd_context *gen7_mfd_context)
2954 {
2955     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2956     gen7_jpeg_wa_init(ctx, gen7_mfd_context);
2957     intel_batchbuffer_emit_mi_flush(batch);
2958     gen7_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2959     gen7_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2960     gen7_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2961     gen7_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2962     gen7_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2963     gen7_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2964     gen7_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2965
2966     gen7_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2967     gen7_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2968     gen7_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2969 }
2970
2971 void
2972 gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
2973                              struct decode_state *decode_state,
2974                              struct gen7_mfd_context *gen7_mfd_context)
2975 {
2976     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2977     VAPictureParameterBufferJPEGBaseline *pic_param;
2978     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2979     dri_bo *slice_data_bo;
2980     int i, j, max_selector = 0;
2981
2982     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2983     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2984
2985     /* Currently only support Baseline DCT */
2986     gen7_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2987     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2988     gen7_mfd_jpeg_wa(ctx, gen7_mfd_context);
2989     intel_batchbuffer_emit_mi_flush(batch);
2990     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2991     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2992     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2993     gen7_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2994     gen7_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2995
2996     for (j = 0; j < decode_state->num_slice_params; j++) {
2997         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2998         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2999         slice_data_bo = decode_state->slice_datas[j]->bo;
3000         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3001
3002         if (j == decode_state->num_slice_params - 1)
3003             next_slice_group_param = NULL;
3004         else
3005             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3006
3007         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3008             int component;
3009
3010             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3011
3012             if (i < decode_state->slice_params[j]->num_elements - 1)
3013                 next_slice_param = slice_param + 1;
3014             else
3015                 next_slice_param = next_slice_group_param;
3016
3017             for (component = 0; component < slice_param->num_components; component++) {
3018                 if (max_selector < slice_param->components[component].dc_table_selector)
3019                     max_selector = slice_param->components[component].dc_table_selector;
3020
3021                 if (max_selector < slice_param->components[component].ac_table_selector)
3022                     max_selector = slice_param->components[component].ac_table_selector;
3023             }
3024
3025             slice_param++;
3026         }
3027     }
3028
3029     assert(max_selector < 2);
3030     gen7_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
3031
3032     for (j = 0; j < decode_state->num_slice_params; j++) {
3033         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3034         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3035         slice_data_bo = decode_state->slice_datas[j]->bo;
3036         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3037
3038         if (j == decode_state->num_slice_params - 1)
3039             next_slice_group_param = NULL;
3040         else
3041             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3042
3043         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3044             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3045
3046             if (i < decode_state->slice_params[j]->num_elements - 1)
3047                 next_slice_param = slice_param + 1;
3048             else
3049                 next_slice_param = next_slice_group_param;
3050
3051             gen7_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
3052             slice_param++;
3053         }
3054     }
3055
3056     intel_batchbuffer_end_atomic(batch);
3057     intel_batchbuffer_flush(batch);
3058 }
3059
3060 static VAStatus
3061 gen7_mfd_decode_picture(VADriverContextP ctx,
3062                         VAProfile profile,
3063                         union codec_state *codec_state,
3064                         struct hw_context *hw_context)
3065
3066 {
3067     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3068     struct decode_state *decode_state = &codec_state->decode;
3069     VAStatus vaStatus;
3070
3071     assert(gen7_mfd_context);
3072
3073     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3074
3075     if (vaStatus != VA_STATUS_SUCCESS)
3076         goto out;
3077
3078     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3079
3080     switch (profile) {
3081     case VAProfileMPEG2Simple:
3082     case VAProfileMPEG2Main:
3083         gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3084         break;
3085
3086     case VAProfileH264ConstrainedBaseline:
3087     case VAProfileH264Main:
3088     case VAProfileH264High:
3089     case VAProfileH264StereoHigh:
3090         gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3091         break;
3092
3093     case VAProfileVC1Simple:
3094     case VAProfileVC1Main:
3095     case VAProfileVC1Advanced:
3096         gen7_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3097         break;
3098
3099     case VAProfileJPEGBaseline:
3100         gen7_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3101         break;
3102
3103     default:
3104         assert(0);
3105         break;
3106     }
3107
3108     vaStatus = VA_STATUS_SUCCESS;
3109
3110 out:
3111     return vaStatus;
3112 }
3113
3114 static void
3115 gen7_mfd_context_destroy(void *hw_context)
3116 {
3117     VADriverContextP ctx;
3118     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3119
3120     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3121
3122     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3123     gen7_mfd_context->post_deblocking_output.bo = NULL;
3124
3125     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3126     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3127
3128     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3129     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3130
3131     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3132     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3133
3134     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3135     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3136
3137     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3138     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3139
3140     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3141     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3142
3143     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3144
3145     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3146         i965_DestroySurfaces(ctx,
3147                              &gen7_mfd_context->jpeg_wa_surface_id,
3148                              1);
3149         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3150     }
3151
3152     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3153     free(gen7_mfd_context);
3154 }
3155
3156 static void gen7_mfd_mpeg2_context_init(VADriverContextP ctx,
3157                                         struct gen7_mfd_context *gen7_mfd_context)
3158 {
3159     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3160     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3161     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3162     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3163 }
3164
3165 struct hw_context *
3166 gen7_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3167 {
3168     struct intel_driver_data *intel = intel_driver_data(ctx);
3169     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3170     int i;
3171
3172     assert(gen7_mfd_context);
3173     gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
3174     gen7_mfd_context->base.run = gen7_mfd_decode_picture;
3175     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3176
3177     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3178         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3179         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3180         gen7_mfd_context->reference_surface[i].obj_surface = NULL;
3181     }
3182
3183     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3184     gen7_mfd_context->jpeg_wa_surface_object = NULL;
3185
3186     switch (obj_config->profile) {
3187     case VAProfileMPEG2Simple:
3188     case VAProfileMPEG2Main:
3189         gen7_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3190         break;
3191
3192     case VAProfileH264ConstrainedBaseline:
3193     case VAProfileH264Main:
3194     case VAProfileH264High:
3195     case VAProfileH264StereoHigh:
3196         gen7_mfd_avc_context_init(ctx, gen7_mfd_context);
3197         break;
3198     default:
3199         break;
3200     }
3201
3202     gen7_mfd_context->driver_context = ctx;
3203     return (struct hw_context *)gen7_mfd_context;
3204 }