OSDN Git Service

Setup VP8 decoding pipeline
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <va/va_dec_jpeg.h>
35 #include <va/va_dec_vp8.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_decoder_utils.h"
43
44 #include "gen7_mfd.h"
45 #include "intel_media.h"
46
47 #define B0_STEP_REV             2
48 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
49
50 static const uint32_t zigzag_direct[64] = {
51     0,   1,  8, 16,  9,  2,  3, 10,
52     17, 24, 32, 25, 18, 11,  4,  5,
53     12, 19, 26, 33, 40, 48, 41, 34,
54     27, 20, 13,  6,  7, 14, 21, 28,
55     35, 42, 49, 56, 57, 50, 43, 36,
56     29, 22, 15, 23, 30, 37, 44, 51,
57     58, 59, 52, 45, 38, 31, 39, 46,
58     53, 60, 61, 54, 47, 55, 62, 63
59 };
60
61 static void
62 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
63                           VAPictureParameterBufferH264 *pic_param,
64                           struct object_surface *obj_surface)
65 {
66     struct i965_driver_data *i965 = i965_driver_data(ctx);
67     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
68     int width_in_mbs, height_in_mbs;
69
70     obj_surface->free_private_data = gen_free_avc_surface;
71     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
72     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
73
74     if (!gen7_avc_surface) {
75         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
76         assert((obj_surface->size & 0x3f) == 0);
77         obj_surface->private_data = gen7_avc_surface;
78     }
79
80     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
81                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
82
83     if (gen7_avc_surface->dmv_top == NULL) {
84         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
85                                                  "direct mv w/r buffer",
86                                                  width_in_mbs * height_in_mbs * 128,
87                                                  0x1000);
88         assert(gen7_avc_surface->dmv_top);
89     }
90
91     if (gen7_avc_surface->dmv_bottom_flag &&
92         gen7_avc_surface->dmv_bottom == NULL) {
93         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
94                                                     "direct mv w/r buffer",
95                                                     width_in_mbs * height_in_mbs * 128,                                                    
96                                                     0x1000);
97         assert(gen7_avc_surface->dmv_bottom);
98     }
99 }
100
101 static void
102 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
103                           struct decode_state *decode_state,
104                           int standard_select,
105                           struct gen7_mfd_context *gen7_mfd_context)
106 {
107     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
108
109     assert(standard_select == MFX_FORMAT_MPEG2 ||
110            standard_select == MFX_FORMAT_AVC ||
111            standard_select == MFX_FORMAT_VC1 ||
112            standard_select == MFX_FORMAT_JPEG ||
113            standard_select == MFX_FORMAT_VP8);
114
115     BEGIN_BCS_BATCH(batch, 5);
116     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
117     OUT_BCS_BATCH(batch,
118                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
119                   (MFD_MODE_VLD << 15) | /* VLD mode */
120                   (0 << 10) | /* disable Stream-Out */
121                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
122                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
123                   (0 << 5)  | /* not in stitch mode */
124                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
125                   (standard_select << 0));
126     OUT_BCS_BATCH(batch,
127                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
128                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
129                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
130                   (0 << 1)  |
131                   (0 << 0));
132     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
133     OUT_BCS_BATCH(batch, 0); /* reserved */
134     ADVANCE_BCS_BATCH(batch);
135 }
136
137 static void
138 gen8_mfd_surface_state(VADriverContextP ctx,
139                        struct decode_state *decode_state,
140                        int standard_select,
141                        struct gen7_mfd_context *gen7_mfd_context)
142 {
143     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
144     struct object_surface *obj_surface = decode_state->render_object;
145     unsigned int y_cb_offset;
146     unsigned int y_cr_offset;
147
148     assert(obj_surface);
149
150     y_cb_offset = obj_surface->y_cb_offset;
151     y_cr_offset = obj_surface->y_cr_offset;
152
153     BEGIN_BCS_BATCH(batch, 6);
154     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155     OUT_BCS_BATCH(batch, 0);
156     OUT_BCS_BATCH(batch,
157                   ((obj_surface->orig_height - 1) << 18) |
158                   ((obj_surface->orig_width - 1) << 4));
159     OUT_BCS_BATCH(batch,
160                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
161                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162                   (0 << 22) | /* surface object control state, ignored */
163                   ((obj_surface->width - 1) << 3) | /* pitch */
164                   (0 << 2)  | /* must be 0 */
165                   (1 << 1)  | /* must be tiled */
166                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
167     OUT_BCS_BATCH(batch,
168                   (0 << 16) | /* X offset for U(Cb), must be 0 */
169                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
170     OUT_BCS_BATCH(batch,
171                   (0 << 16) | /* X offset for V(Cr), must be 0 */
172                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173     ADVANCE_BCS_BATCH(batch);
174 }
175
176 static void
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178                              struct decode_state *decode_state,
179                              int standard_select,
180                              struct gen7_mfd_context *gen7_mfd_context)
181 {
182     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
183     int i;
184
185     BEGIN_BCS_BATCH(batch, 61);
186     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
187         /* Pre-deblock 1-3 */
188     if (gen7_mfd_context->pre_deblocking_output.valid)
189         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
190                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
191                       0);
192     else
193         OUT_BCS_BATCH(batch, 0);
194
195         OUT_BCS_BATCH(batch, 0);
196         OUT_BCS_BATCH(batch, 0);
197         /* Post-debloing 4-6 */
198     if (gen7_mfd_context->post_deblocking_output.valid)
199         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
200                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
201                       0);
202     else
203         OUT_BCS_BATCH(batch, 0);
204
205         OUT_BCS_BATCH(batch, 0);
206         OUT_BCS_BATCH(batch, 0);
207
208         /* uncompressed-video & stream out 7-12 */
209     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
210     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
211         OUT_BCS_BATCH(batch, 0);
212         OUT_BCS_BATCH(batch, 0);
213         OUT_BCS_BATCH(batch, 0);
214         OUT_BCS_BATCH(batch, 0);
215
216         /* intra row-store scratch 13-15 */
217     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
218         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
219                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
220                       0);
221     else
222         OUT_BCS_BATCH(batch, 0);
223
224         OUT_BCS_BATCH(batch, 0);
225         OUT_BCS_BATCH(batch, 0);
226         /* deblocking-filter-row-store 16-18 */
227     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
228         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
229                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
230                       0);
231     else
232         OUT_BCS_BATCH(batch, 0);
233         OUT_BCS_BATCH(batch, 0);
234         OUT_BCS_BATCH(batch, 0);
235
236     /* DW 19..50 */
237     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
238         struct object_surface *obj_surface;
239
240         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
241             gen7_mfd_context->reference_surface[i].obj_surface &&
242             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
243             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
244
245             OUT_BCS_RELOC(batch, obj_surface->bo,
246                           I915_GEM_DOMAIN_INSTRUCTION, 0,
247                           0);
248         } else {
249             OUT_BCS_BATCH(batch, 0);
250         }
251         
252         OUT_BCS_BATCH(batch, 0);
253     }
254     
255     /* reference property 51 */
256     OUT_BCS_BATCH(batch, 0);  
257         
258     /* Macroblock status & ILDB 52-57 */
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262     OUT_BCS_BATCH(batch, 0);
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265
266     /* the second Macroblock status 58-60 */    
267     OUT_BCS_BATCH(batch, 0);
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270
271     ADVANCE_BCS_BATCH(batch);
272 }
273
274 static void
275 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
276                                  dri_bo *slice_data_bo,
277                                  int standard_select,
278                                  struct gen7_mfd_context *gen7_mfd_context)
279 {
280     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
281
282     BEGIN_BCS_BATCH(batch, 26);
283     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
284         /* MFX In BS 1-5 */
285     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
286     OUT_BCS_BATCH(batch, 0);
287     OUT_BCS_BATCH(batch, 0);
288         /* Upper bound 4-5 */   
289     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
290     OUT_BCS_BATCH(batch, 0);
291
292         /* MFX indirect MV 6-10 */
293     OUT_BCS_BATCH(batch, 0);
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0);
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298         
299         /* MFX IT_COFF 11-15 */
300     OUT_BCS_BATCH(batch, 0);
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305
306         /* MFX IT_DBLK 16-20 */
307     OUT_BCS_BATCH(batch, 0);
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312
313         /* MFX PAK_BSE object for encoder 21-25 */
314     OUT_BCS_BATCH(batch, 0);
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319
320     ADVANCE_BCS_BATCH(batch);
321 }
322
323 static void
324 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
325                                  struct decode_state *decode_state,
326                                  int standard_select,
327                                  struct gen7_mfd_context *gen7_mfd_context)
328 {
329     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
330
331     BEGIN_BCS_BATCH(batch, 10);
332     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
333
334     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
335         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
336                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
337                       0);
338         else
339                 OUT_BCS_BATCH(batch, 0);
340                 
341     OUT_BCS_BATCH(batch, 0);
342     OUT_BCS_BATCH(batch, 0);
343         /* MPR Row Store Scratch buffer 4-6 */
344     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
345         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
346                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
347                       0);
348     else
349         OUT_BCS_BATCH(batch, 0);
350
351     OUT_BCS_BATCH(batch, 0);
352     OUT_BCS_BATCH(batch, 0);
353
354         /* Bitplane 7-9 */ 
355     if (gen7_mfd_context->bitplane_read_buffer.valid)
356         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
357                       I915_GEM_DOMAIN_INSTRUCTION, 0,
358                       0);
359     else
360         OUT_BCS_BATCH(batch, 0);
361     OUT_BCS_BATCH(batch, 0);
362     OUT_BCS_BATCH(batch, 0);
363     ADVANCE_BCS_BATCH(batch);
364 }
365
366 static void
367 gen8_mfd_qm_state(VADriverContextP ctx,
368                   int qm_type,
369                   unsigned char *qm,
370                   int qm_length,
371                   struct gen7_mfd_context *gen7_mfd_context)
372 {
373     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
374     unsigned int qm_buffer[16];
375
376     assert(qm_length <= 16 * 4);
377     memcpy(qm_buffer, qm, qm_length);
378
379     BEGIN_BCS_BATCH(batch, 18);
380     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
381     OUT_BCS_BATCH(batch, qm_type << 0);
382     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
383     ADVANCE_BCS_BATCH(batch);
384 }
385
386 static void
387 gen8_mfd_avc_img_state(VADriverContextP ctx,
388                        struct decode_state *decode_state,
389                        struct gen7_mfd_context *gen7_mfd_context)
390 {
391     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
392     int img_struct;
393     int mbaff_frame_flag;
394     unsigned int width_in_mbs, height_in_mbs;
395     VAPictureParameterBufferH264 *pic_param;
396
397     assert(decode_state->pic_param && decode_state->pic_param->buffer);
398     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
399     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
400
401     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
402         img_struct = 1;
403     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
404         img_struct = 3;
405     else
406         img_struct = 0;
407
408     if ((img_struct & 0x1) == 0x1) {
409         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
410     } else {
411         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
412     }
413
414     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
415         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
416         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
417     } else {
418         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
419     }
420
421     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
422                         !pic_param->pic_fields.bits.field_pic_flag);
423
424     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
425     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
426
427     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
428     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
429            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
430     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
431
432     BEGIN_BCS_BATCH(batch, 17);
433     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
434     OUT_BCS_BATCH(batch, 
435                   width_in_mbs * height_in_mbs);
436     OUT_BCS_BATCH(batch, 
437                   ((height_in_mbs - 1) << 16) | 
438                   ((width_in_mbs - 1) << 0));
439     OUT_BCS_BATCH(batch, 
440                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
441                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
442                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
443                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
444                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
445                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
446                   (img_struct << 8));
447     OUT_BCS_BATCH(batch,
448                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
449                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
450                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
451                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
452                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
453                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
454                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
455                   (mbaff_frame_flag << 1) |
456                   (pic_param->pic_fields.bits.field_pic_flag << 0));
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     OUT_BCS_BATCH(batch, 0);
465     OUT_BCS_BATCH(batch, 0);
466     OUT_BCS_BATCH(batch, 0);
467     OUT_BCS_BATCH(batch, 0);
468     OUT_BCS_BATCH(batch, 0);
469     ADVANCE_BCS_BATCH(batch);
470 }
471
472 static void
473 gen8_mfd_avc_qm_state(VADriverContextP ctx,
474                       struct decode_state *decode_state,
475                       struct gen7_mfd_context *gen7_mfd_context)
476 {
477     VAIQMatrixBufferH264 *iq_matrix;
478     VAPictureParameterBufferH264 *pic_param;
479
480     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
481         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
482     else
483         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
484
485     assert(decode_state->pic_param && decode_state->pic_param->buffer);
486     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
487
488     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
489     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
490
491     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
492         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
493         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
494     }
495 }
496
497 static void
498 gen8_mfd_avc_picid_state(VADriverContextP ctx,
499                       struct decode_state *decode_state,
500                       struct gen7_mfd_context *gen7_mfd_context)
501 {
502     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
503
504     BEGIN_BCS_BATCH(batch, 10);
505     OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
506     OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
507     OUT_BCS_BATCH(batch, 0);
508     OUT_BCS_BATCH(batch, 0);
509     OUT_BCS_BATCH(batch, 0);
510     OUT_BCS_BATCH(batch, 0);
511     OUT_BCS_BATCH(batch, 0);
512     OUT_BCS_BATCH(batch, 0);
513     OUT_BCS_BATCH(batch, 0);
514     OUT_BCS_BATCH(batch, 0);
515     ADVANCE_BCS_BATCH(batch);
516 }
517
518 static void
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520                               struct decode_state *decode_state,
521                               VAPictureParameterBufferH264 *pic_param,
522                               VASliceParameterBufferH264 *slice_param,
523                               struct gen7_mfd_context *gen7_mfd_context)
524 {
525     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
526     struct object_surface *obj_surface;
527     GenAvcSurface *gen7_avc_surface;
528     VAPictureH264 *va_pic;
529     int i, j;
530
531     BEGIN_BCS_BATCH(batch, 71);
532     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
533
534     /* reference surfaces 0..15 */
535     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
536         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
537             gen7_mfd_context->reference_surface[i].obj_surface &&
538             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
539
540             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
541             gen7_avc_surface = obj_surface->private_data;
542
543             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
544                           I915_GEM_DOMAIN_INSTRUCTION, 0,
545                           0);
546             OUT_BCS_BATCH(batch, 0);
547         } else {
548             OUT_BCS_BATCH(batch, 0);
549             OUT_BCS_BATCH(batch, 0);
550         }
551     }
552     
553     OUT_BCS_BATCH(batch, 0);
554
555     /* the current decoding frame/field */
556     va_pic = &pic_param->CurrPic;
557     obj_surface = decode_state->render_object;
558     assert(obj_surface->bo && obj_surface->private_data);
559     gen7_avc_surface = obj_surface->private_data;
560
561     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
562                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
563                   0);
564
565     OUT_BCS_BATCH(batch, 0);
566     OUT_BCS_BATCH(batch, 0);
567
568     /* POC List */
569     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
570         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
571             int found = 0;
572
573             assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
574
575             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
576                 va_pic = &pic_param->ReferenceFrames[j];
577                 
578                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
579                     continue;
580
581                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
582                     found = 1;
583                     break;
584                 }
585             }
586
587             assert(found == 1);
588             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
589             
590             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
591             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
592         } else {
593             OUT_BCS_BATCH(batch, 0);
594             OUT_BCS_BATCH(batch, 0);
595         }
596     }
597
598     va_pic = &pic_param->CurrPic;
599     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
600     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
601
602     ADVANCE_BCS_BATCH(batch);
603 }
604
605 static void
606 gen8_mfd_avc_slice_state(VADriverContextP ctx,
607                          VAPictureParameterBufferH264 *pic_param,
608                          VASliceParameterBufferH264 *slice_param,
609                          VASliceParameterBufferH264 *next_slice_param,
610                          struct gen7_mfd_context *gen7_mfd_context)
611 {
612     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
613     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
614     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
615     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
616     int num_ref_idx_l0, num_ref_idx_l1;
617     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
618                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
619     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
620     int slice_type;
621
622     if (slice_param->slice_type == SLICE_TYPE_I ||
623         slice_param->slice_type == SLICE_TYPE_SI) {
624         slice_type = SLICE_TYPE_I;
625     } else if (slice_param->slice_type == SLICE_TYPE_P ||
626                slice_param->slice_type == SLICE_TYPE_SP) {
627         slice_type = SLICE_TYPE_P;
628     } else { 
629         assert(slice_param->slice_type == SLICE_TYPE_B);
630         slice_type = SLICE_TYPE_B;
631     }
632
633     if (slice_type == SLICE_TYPE_I) {
634         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
635         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636         num_ref_idx_l0 = 0;
637         num_ref_idx_l1 = 0;
638     } else if (slice_type == SLICE_TYPE_P) {
639         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
640         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
641         num_ref_idx_l1 = 0;
642     } else {
643         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
644         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
645     }
646
647     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
648     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
649     slice_ver_pos = first_mb_in_slice / width_in_mbs;
650
651     if (next_slice_param) {
652         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
653         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
654         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
655     } else {
656         next_slice_hor_pos = 0;
657         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
658     }
659
660     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
661     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
662     OUT_BCS_BATCH(batch, slice_type);
663     OUT_BCS_BATCH(batch, 
664                   (num_ref_idx_l1 << 24) |
665                   (num_ref_idx_l0 << 16) |
666                   (slice_param->chroma_log2_weight_denom << 8) |
667                   (slice_param->luma_log2_weight_denom << 0));
668     OUT_BCS_BATCH(batch, 
669                   (slice_param->direct_spatial_mv_pred_flag << 29) |
670                   (slice_param->disable_deblocking_filter_idc << 27) |
671                   (slice_param->cabac_init_idc << 24) |
672                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
673                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
674                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
675     OUT_BCS_BATCH(batch, 
676                   (slice_ver_pos << 24) |
677                   (slice_hor_pos << 16) | 
678                   (first_mb_in_slice << 0));
679     OUT_BCS_BATCH(batch,
680                   (next_slice_ver_pos << 16) |
681                   (next_slice_hor_pos << 0));
682     OUT_BCS_BATCH(batch, 
683                   (next_slice_param == NULL) << 19); /* last slice flag */
684     OUT_BCS_BATCH(batch, 0);
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     ADVANCE_BCS_BATCH(batch);
689 }
690
691 static inline void
692 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
693                            VAPictureParameterBufferH264 *pic_param,
694                            VASliceParameterBufferH264 *slice_param,
695                            struct gen7_mfd_context *gen7_mfd_context)
696 {
697     gen6_send_avc_ref_idx_state(
698         gen7_mfd_context->base.batch,
699         slice_param,
700         gen7_mfd_context->reference_surface
701     );
702 }
703
704 static void
705 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
706                                 VAPictureParameterBufferH264 *pic_param,
707                                 VASliceParameterBufferH264 *slice_param,
708                                 struct gen7_mfd_context *gen7_mfd_context)
709 {
710     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
711     int i, j, num_weight_offset_table = 0;
712     short weightoffsets[32 * 6];
713
714     if ((slice_param->slice_type == SLICE_TYPE_P ||
715          slice_param->slice_type == SLICE_TYPE_SP) &&
716         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
717         num_weight_offset_table = 1;
718     }
719     
720     if ((slice_param->slice_type == SLICE_TYPE_B) &&
721         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
722         num_weight_offset_table = 2;
723     }
724
725     for (i = 0; i < num_weight_offset_table; i++) {
726         BEGIN_BCS_BATCH(batch, 98);
727         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
728         OUT_BCS_BATCH(batch, i);
729
730         if (i == 0) {
731             for (j = 0; j < 32; j++) {
732                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
733                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
734                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
735                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
736                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
737                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
738             }
739         } else {
740             for (j = 0; j < 32; j++) {
741                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
742                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
743                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
744                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
745                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
746                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
747             }
748         }
749
750         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
751         ADVANCE_BCS_BATCH(batch);
752     }
753 }
754
755 static void
756 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
757                         VAPictureParameterBufferH264 *pic_param,
758                         VASliceParameterBufferH264 *slice_param,
759                         dri_bo *slice_data_bo,
760                         VASliceParameterBufferH264 *next_slice_param,
761                         struct gen7_mfd_context *gen7_mfd_context)
762 {
763     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
764     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
765                                                             slice_param,
766                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
767
768     /* the input bitsteam format on GEN7 differs from GEN6 */
769     BEGIN_BCS_BATCH(batch, 6);
770     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
771     OUT_BCS_BATCH(batch, 
772                   (slice_param->slice_data_size));
773     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
774     OUT_BCS_BATCH(batch,
775                   (0 << 31) |
776                   (0 << 14) |
777                   (0 << 12) |
778                   (0 << 10) |
779                   (0 << 8));
780     OUT_BCS_BATCH(batch,
781                   ((slice_data_bit_offset >> 3) << 16) |
782                   (1 << 7)  |
783                   (0 << 5)  |
784                   (0 << 4)  |
785                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
786                   (slice_data_bit_offset & 0x7));
787     OUT_BCS_BATCH(batch, 0);
788     ADVANCE_BCS_BATCH(batch);
789 }
790
791 static inline void
792 gen8_mfd_avc_context_init(
793     VADriverContextP         ctx,
794     struct gen7_mfd_context *gen7_mfd_context
795 )
796 {
797     /* Initialize flat scaling lists */
798     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
799 }
800
801 static void
802 gen8_mfd_avc_decode_init(VADriverContextP ctx,
803                          struct decode_state *decode_state,
804                          struct gen7_mfd_context *gen7_mfd_context)
805 {
806     VAPictureParameterBufferH264 *pic_param;
807     VASliceParameterBufferH264 *slice_param;
808     struct i965_driver_data *i965 = i965_driver_data(ctx);
809     struct object_surface *obj_surface;
810     dri_bo *bo;
811     int i, j, enable_avc_ildb = 0;
812     unsigned int width_in_mbs, height_in_mbs;
813
814     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
815         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
816         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
817
818         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
819             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
820             assert((slice_param->slice_type == SLICE_TYPE_I) ||
821                    (slice_param->slice_type == SLICE_TYPE_SI) ||
822                    (slice_param->slice_type == SLICE_TYPE_P) ||
823                    (slice_param->slice_type == SLICE_TYPE_SP) ||
824                    (slice_param->slice_type == SLICE_TYPE_B));
825
826             if (slice_param->disable_deblocking_filter_idc != 1) {
827                 enable_avc_ildb = 1;
828                 break;
829             }
830
831             slice_param++;
832         }
833     }
834
835     assert(decode_state->pic_param && decode_state->pic_param->buffer);
836     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
837     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
838     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
839     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
840     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
841     assert(height_in_mbs > 0 && height_in_mbs <= 256);
842
843     /* Current decoded picture */
844     obj_surface = decode_state->render_object;
845     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
846     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
847     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
848
849     /* initial uv component for YUV400 case */
850     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
851          unsigned int uv_offset = obj_surface->width * obj_surface->height; 
852          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
853
854          drm_intel_gem_bo_map_gtt(obj_surface->bo);
855          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
856          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
857     }
858
859     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
860
861     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
862     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
863     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
864     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
865
866     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
867     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
868     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
869     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
870
871     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
872     bo = dri_bo_alloc(i965->intel.bufmgr,
873                       "intra row store",
874                       width_in_mbs * 64,
875                       0x1000);
876     assert(bo);
877     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
878     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
879
880     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
881     bo = dri_bo_alloc(i965->intel.bufmgr,
882                       "deblocking filter row store",
883                       width_in_mbs * 64 * 4,
884                       0x1000);
885     assert(bo);
886     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
887     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
888
889     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
890     bo = dri_bo_alloc(i965->intel.bufmgr,
891                       "bsd mpc row store",
892                       width_in_mbs * 64 * 2,
893                       0x1000);
894     assert(bo);
895     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
896     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
897
898     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
899     bo = dri_bo_alloc(i965->intel.bufmgr,
900                       "mpr row store",
901                       width_in_mbs * 64 * 2,
902                       0x1000);
903     assert(bo);
904     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
905     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
906
907     gen7_mfd_context->bitplane_read_buffer.valid = 0;
908 }
909
910 static void
911 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
912                             struct decode_state *decode_state,
913                             struct gen7_mfd_context *gen7_mfd_context)
914 {
915     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
916     VAPictureParameterBufferH264 *pic_param;
917     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
918     dri_bo *slice_data_bo;
919     int i, j;
920
921     assert(decode_state->pic_param && decode_state->pic_param->buffer);
922     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
923     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
924
925     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
926     intel_batchbuffer_emit_mi_flush(batch);
927     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
928     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
929     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
930     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
931     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
932     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
933     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
934
935     for (j = 0; j < decode_state->num_slice_params; j++) {
936         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
937         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
938         slice_data_bo = decode_state->slice_datas[j]->bo;
939         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
940
941         if (j == decode_state->num_slice_params - 1)
942             next_slice_group_param = NULL;
943         else
944             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
945
946         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
947             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
948             assert((slice_param->slice_type == SLICE_TYPE_I) ||
949                    (slice_param->slice_type == SLICE_TYPE_SI) ||
950                    (slice_param->slice_type == SLICE_TYPE_P) ||
951                    (slice_param->slice_type == SLICE_TYPE_SP) ||
952                    (slice_param->slice_type == SLICE_TYPE_B));
953
954             if (i < decode_state->slice_params[j]->num_elements - 1)
955                 next_slice_param = slice_param + 1;
956             else
957                 next_slice_param = next_slice_group_param;
958
959             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
960             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
961             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
962             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
963             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
964             slice_param++;
965         }
966     }
967
968     intel_batchbuffer_end_atomic(batch);
969     intel_batchbuffer_flush(batch);
970 }
971
972 static void
973 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
974                            struct decode_state *decode_state,
975                            struct gen7_mfd_context *gen7_mfd_context)
976 {
977     VAPictureParameterBufferMPEG2 *pic_param;
978     struct i965_driver_data *i965 = i965_driver_data(ctx);
979     struct object_surface *obj_surface;
980     dri_bo *bo;
981     unsigned int width_in_mbs;
982
983     assert(decode_state->pic_param && decode_state->pic_param->buffer);
984     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
985     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
986
987     mpeg2_set_reference_surfaces(
988         ctx,
989         gen7_mfd_context->reference_surface,
990         decode_state,
991         pic_param
992     );
993
994     /* Current decoded picture */
995     obj_surface = decode_state->render_object;
996     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
997
998     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
999     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1000     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1001     gen7_mfd_context->pre_deblocking_output.valid = 1;
1002
1003     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1004     bo = dri_bo_alloc(i965->intel.bufmgr,
1005                       "bsd mpc row store",
1006                       width_in_mbs * 96,
1007                       0x1000);
1008     assert(bo);
1009     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1010     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1011
1012     gen7_mfd_context->post_deblocking_output.valid = 0;
1013     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1014     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1015     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1016     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1017 }
1018
1019 static void
1020 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1021                          struct decode_state *decode_state,
1022                          struct gen7_mfd_context *gen7_mfd_context)
1023 {
1024     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1025     VAPictureParameterBufferMPEG2 *pic_param;
1026     unsigned int slice_concealment_disable_bit = 0;
1027
1028     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1029     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1030
1031     slice_concealment_disable_bit = 1;
1032
1033     BEGIN_BCS_BATCH(batch, 13);
1034     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1035     OUT_BCS_BATCH(batch,
1036                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1037                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1038                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1039                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1040                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1041                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1042                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1043                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1044                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1045                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1046                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1047                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1048     OUT_BCS_BATCH(batch,
1049                   pic_param->picture_coding_type << 9);
1050     OUT_BCS_BATCH(batch,
1051                   (slice_concealment_disable_bit << 31) |
1052                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1053                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1054     OUT_BCS_BATCH(batch, 0);
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     OUT_BCS_BATCH(batch, 0);
1061     OUT_BCS_BATCH(batch, 0);
1062     OUT_BCS_BATCH(batch, 0);
1063     ADVANCE_BCS_BATCH(batch);
1064 }
1065
1066 static void
1067 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1068                         struct decode_state *decode_state,
1069                         struct gen7_mfd_context *gen7_mfd_context)
1070 {
1071     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1072     int i, j;
1073
1074     /* Update internal QM state */
1075     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1076         VAIQMatrixBufferMPEG2 * const iq_matrix =
1077             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1078
1079         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1080             iq_matrix->load_intra_quantiser_matrix) {
1081             gen_iq_matrix->load_intra_quantiser_matrix =
1082                 iq_matrix->load_intra_quantiser_matrix;
1083             if (iq_matrix->load_intra_quantiser_matrix) {
1084                 for (j = 0; j < 64; j++)
1085                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1086                         iq_matrix->intra_quantiser_matrix[j];
1087             }
1088         }
1089
1090         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1091             iq_matrix->load_non_intra_quantiser_matrix) {
1092             gen_iq_matrix->load_non_intra_quantiser_matrix =
1093                 iq_matrix->load_non_intra_quantiser_matrix;
1094             if (iq_matrix->load_non_intra_quantiser_matrix) {
1095                 for (j = 0; j < 64; j++)
1096                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1097                         iq_matrix->non_intra_quantiser_matrix[j];
1098             }
1099         }
1100     }
1101
1102     /* Commit QM state to HW */
1103     for (i = 0; i < 2; i++) {
1104         unsigned char *qm = NULL;
1105         int qm_type;
1106
1107         if (i == 0) {
1108             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1109                 qm = gen_iq_matrix->intra_quantiser_matrix;
1110                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1111             }
1112         } else {
1113             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1114                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1115                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1116             }
1117         }
1118
1119         if (!qm)
1120             continue;
1121
1122         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1123     }
1124 }
1125
1126 static void
1127 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1128                           VAPictureParameterBufferMPEG2 *pic_param,
1129                           VASliceParameterBufferMPEG2 *slice_param,
1130                           VASliceParameterBufferMPEG2 *next_slice_param,
1131                           struct gen7_mfd_context *gen7_mfd_context)
1132 {
1133     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1134     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1135     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1136
1137     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1138         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1139         is_field_pic = 1;
1140     is_field_pic_wa = is_field_pic &&
1141         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1142
1143     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1144     hpos0 = slice_param->slice_horizontal_position;
1145
1146     if (next_slice_param == NULL) {
1147         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1148         hpos1 = 0;
1149     } else {
1150         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1151         hpos1 = next_slice_param->slice_horizontal_position;
1152     }
1153
1154     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1155
1156     BEGIN_BCS_BATCH(batch, 5);
1157     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1158     OUT_BCS_BATCH(batch, 
1159                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1160     OUT_BCS_BATCH(batch, 
1161                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1162     OUT_BCS_BATCH(batch,
1163                   hpos0 << 24 |
1164                   vpos0 << 16 |
1165                   mb_count << 8 |
1166                   (next_slice_param == NULL) << 5 |
1167                   (next_slice_param == NULL) << 3 |
1168                   (slice_param->macroblock_offset & 0x7));
1169     OUT_BCS_BATCH(batch,
1170                   (slice_param->quantiser_scale_code << 24) |
1171                   (vpos1 << 8 | hpos1));
1172     ADVANCE_BCS_BATCH(batch);
1173 }
1174
1175 static void
1176 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1177                               struct decode_state *decode_state,
1178                               struct gen7_mfd_context *gen7_mfd_context)
1179 {
1180     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1181     VAPictureParameterBufferMPEG2 *pic_param;
1182     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1183     dri_bo *slice_data_bo;
1184     int i, j;
1185
1186     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1187     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1188
1189     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1190     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1191     intel_batchbuffer_emit_mi_flush(batch);
1192     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1194     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1195     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1196     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1197     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1198
1199     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1200         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1201             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1202
1203     for (j = 0; j < decode_state->num_slice_params; j++) {
1204         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1205         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1206         slice_data_bo = decode_state->slice_datas[j]->bo;
1207         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1208
1209         if (j == decode_state->num_slice_params - 1)
1210             next_slice_group_param = NULL;
1211         else
1212             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1213
1214         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1215             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1216
1217             if (i < decode_state->slice_params[j]->num_elements - 1)
1218                 next_slice_param = slice_param + 1;
1219             else
1220                 next_slice_param = next_slice_group_param;
1221
1222             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1223             slice_param++;
1224         }
1225     }
1226
1227     intel_batchbuffer_end_atomic(batch);
1228     intel_batchbuffer_flush(batch);
1229 }
1230
1231 static const int va_to_gen7_vc1_pic_type[5] = {
1232     GEN7_VC1_I_PICTURE,
1233     GEN7_VC1_P_PICTURE,
1234     GEN7_VC1_B_PICTURE,
1235     GEN7_VC1_BI_PICTURE,
1236     GEN7_VC1_P_PICTURE,
1237 };
1238
1239 static const int va_to_gen7_vc1_mv[4] = {
1240     1, /* 1-MV */
1241     2, /* 1-MV half-pel */
1242     3, /* 1-MV half-pef bilinear */
1243     0, /* Mixed MV */
1244 };
1245
1246 static const int b_picture_scale_factor[21] = {
1247     128, 85,  170, 64,  192,
1248     51,  102, 153, 204, 43,
1249     215, 37,  74,  111, 148,
1250     185, 222, 32,  96,  160, 
1251     224,
1252 };
1253
1254 static const int va_to_gen7_vc1_condover[3] = {
1255     0,
1256     2,
1257     3
1258 };
1259
1260 static const int va_to_gen7_vc1_profile[4] = {
1261     GEN7_VC1_SIMPLE_PROFILE,
1262     GEN7_VC1_MAIN_PROFILE,
1263     GEN7_VC1_RESERVED_PROFILE,
1264     GEN7_VC1_ADVANCED_PROFILE
1265 };
1266
1267 static void 
1268 gen8_mfd_free_vc1_surface(void **data)
1269 {
1270     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1271
1272     if (!gen7_vc1_surface)
1273         return;
1274
1275     dri_bo_unreference(gen7_vc1_surface->dmv);
1276     free(gen7_vc1_surface);
1277     *data = NULL;
1278 }
1279
1280 static void
1281 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1282                           VAPictureParameterBufferVC1 *pic_param,
1283                           struct object_surface *obj_surface)
1284 {
1285     struct i965_driver_data *i965 = i965_driver_data(ctx);
1286     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1287     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1288     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1289
1290     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1291
1292     if (!gen7_vc1_surface) {
1293         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1294         assert((obj_surface->size & 0x3f) == 0);
1295         obj_surface->private_data = gen7_vc1_surface;
1296     }
1297
1298     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1299
1300     if (gen7_vc1_surface->dmv == NULL) {
1301         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1302                                              "direct mv w/r buffer",
1303                                              width_in_mbs * height_in_mbs * 64,
1304                                              0x1000);
1305     }
1306 }
1307
1308 static void
1309 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1310                          struct decode_state *decode_state,
1311                          struct gen7_mfd_context *gen7_mfd_context)
1312 {
1313     VAPictureParameterBufferVC1 *pic_param;
1314     struct i965_driver_data *i965 = i965_driver_data(ctx);
1315     struct object_surface *obj_surface;
1316     dri_bo *bo;
1317     int width_in_mbs;
1318     int picture_type;
1319
1320     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1321     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1322     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1323     picture_type = pic_param->picture_fields.bits.picture_type;
1324  
1325     intel_update_vc1_frame_store_index(ctx,
1326                                        decode_state,
1327                                        pic_param,
1328                                        gen7_mfd_context->reference_surface);
1329
1330     /* Current decoded picture */
1331     obj_surface = decode_state->render_object;
1332     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1333     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1334
1335     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1336     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1337     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1338     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1339
1340     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1341     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1342     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1343     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1344
1345     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1346     bo = dri_bo_alloc(i965->intel.bufmgr,
1347                       "intra row store",
1348                       width_in_mbs * 64,
1349                       0x1000);
1350     assert(bo);
1351     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1352     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1353
1354     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1355     bo = dri_bo_alloc(i965->intel.bufmgr,
1356                       "deblocking filter row store",
1357                       width_in_mbs * 7 * 64,
1358                       0x1000);
1359     assert(bo);
1360     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1361     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1362
1363     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1364     bo = dri_bo_alloc(i965->intel.bufmgr,
1365                       "bsd mpc row store",
1366                       width_in_mbs * 96,
1367                       0x1000);
1368     assert(bo);
1369     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1370     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1371
1372     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1373
1374     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1375     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1376     
1377     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1378         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1379         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1380         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1381         int src_w, src_h;
1382         uint8_t *src = NULL, *dst = NULL;
1383
1384         assert(decode_state->bit_plane->buffer);
1385         src = decode_state->bit_plane->buffer;
1386
1387         bo = dri_bo_alloc(i965->intel.bufmgr,
1388                           "VC-1 Bitplane",
1389                           bitplane_width * height_in_mbs,
1390                           0x1000);
1391         assert(bo);
1392         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1393
1394         dri_bo_map(bo, True);
1395         assert(bo->virtual);
1396         dst = bo->virtual;
1397
1398         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1399             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1400                 int src_index, dst_index;
1401                 int src_shift;
1402                 uint8_t src_value;
1403
1404                 src_index = (src_h * width_in_mbs + src_w) / 2;
1405                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1406                 src_value = ((src[src_index] >> src_shift) & 0xf);
1407
1408                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1409                     src_value |= 0x2;
1410                 }
1411
1412                 dst_index = src_w / 2;
1413                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1414             }
1415
1416             if (src_w & 1)
1417                 dst[src_w / 2] >>= 4;
1418
1419             dst += bitplane_width;
1420         }
1421
1422         dri_bo_unmap(bo);
1423     } else
1424         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1425 }
1426
1427 static void
1428 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1429                        struct decode_state *decode_state,
1430                        struct gen7_mfd_context *gen7_mfd_context)
1431 {
1432     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1433     VAPictureParameterBufferVC1 *pic_param;
1434     struct object_surface *obj_surface;
1435     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1436     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1437     int unified_mv_mode;
1438     int ref_field_pic_polarity = 0;
1439     int scale_factor = 0;
1440     int trans_ac_y = 0;
1441     int dmv_surface_valid = 0;
1442     int brfd = 0;
1443     int fcm = 0;
1444     int picture_type;
1445     int profile;
1446     int overlap;
1447     int interpolation_mode = 0;
1448
1449     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1450     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1451
1452     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1453     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1454     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1455     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1456     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1457     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1458     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1459     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1460
1461     if (dquant == 0) {
1462         alt_pquant_config = 0;
1463         alt_pquant_edge_mask = 0;
1464     } else if (dquant == 2) {
1465         alt_pquant_config = 1;
1466         alt_pquant_edge_mask = 0xf;
1467     } else {
1468         assert(dquant == 1);
1469         if (dquantfrm == 0) {
1470             alt_pquant_config = 0;
1471             alt_pquant_edge_mask = 0;
1472             alt_pq = 0;
1473         } else {
1474             assert(dquantfrm == 1);
1475             alt_pquant_config = 1;
1476
1477             switch (dqprofile) {
1478             case 3:
1479                 if (dqbilevel == 0) {
1480                     alt_pquant_config = 2;
1481                     alt_pquant_edge_mask = 0;
1482                 } else {
1483                     assert(dqbilevel == 1);
1484                     alt_pquant_config = 3;
1485                     alt_pquant_edge_mask = 0;
1486                 }
1487                 break;
1488                 
1489             case 0:
1490                 alt_pquant_edge_mask = 0xf;
1491                 break;
1492
1493             case 1:
1494                 if (dqdbedge == 3)
1495                     alt_pquant_edge_mask = 0x9;
1496                 else
1497                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1498
1499                 break;
1500
1501             case 2:
1502                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1503                 break;
1504
1505             default:
1506                 assert(0);
1507             }
1508         }
1509     }
1510
1511     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1512         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1513         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1514     } else {
1515         assert(pic_param->mv_fields.bits.mv_mode < 4);
1516         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1517     }
1518
1519     if (pic_param->sequence_fields.bits.interlace == 1 &&
1520         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1521         /* FIXME: calculate reference field picture polarity */
1522         assert(0);
1523         ref_field_pic_polarity = 0;
1524     }
1525
1526     if (pic_param->b_picture_fraction < 21)
1527         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1528
1529     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1530     
1531     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1532         picture_type == GEN7_VC1_I_PICTURE)
1533         picture_type = GEN7_VC1_BI_PICTURE;
1534
1535     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1536         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1537     else {
1538         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1539
1540         /*
1541          * 8.3.6.2.1 Transform Type Selection
1542          * If variable-sized transform coding is not enabled,
1543          * then the 8x8 transform shall be used for all blocks.
1544          * it is also MFX_VC1_PIC_STATE requirement.
1545          */
1546         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1547             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1548             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1549         }
1550     }
1551
1552     if (picture_type == GEN7_VC1_B_PICTURE) {
1553         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1554
1555         obj_surface = decode_state->reference_objects[1];
1556
1557         if (obj_surface)
1558             gen7_vc1_surface = obj_surface->private_data;
1559
1560         if (!gen7_vc1_surface || 
1561             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1562              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1563             dmv_surface_valid = 0;
1564         else
1565             dmv_surface_valid = 1;
1566     }
1567
1568     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1569
1570     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1571         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1572     else {
1573         if (pic_param->picture_fields.bits.top_field_first)
1574             fcm = 2;
1575         else
1576             fcm = 3;
1577     }
1578
1579     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1580         brfd = pic_param->reference_fields.bits.reference_distance;
1581         brfd = (scale_factor * brfd) >> 8;
1582         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1583
1584         if (brfd < 0)
1585             brfd = 0;
1586     }
1587
1588     overlap = 0;
1589     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1590         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1591             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1592             overlap = 1; 
1593         }
1594     }else {
1595         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1596              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1597               overlap = 1; 
1598         }
1599         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1600             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1601              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1602                 overlap = 1; 
1603              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1604                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1605                  overlap = 1;
1606              }
1607         }
1608     } 
1609
1610     assert(pic_param->conditional_overlap_flag < 3);
1611     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1612
1613     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1614         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1615          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1616         interpolation_mode = 9; /* Half-pel bilinear */
1617     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1618              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1619               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1620         interpolation_mode = 1; /* Half-pel bicubic */
1621     else
1622         interpolation_mode = 0; /* Quarter-pel bicubic */
1623
1624     BEGIN_BCS_BATCH(batch, 6);
1625     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1626     OUT_BCS_BATCH(batch,
1627                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1628                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1629     OUT_BCS_BATCH(batch,
1630                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1631                   dmv_surface_valid << 15 |
1632                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1633                   pic_param->rounding_control << 13 |
1634                   pic_param->sequence_fields.bits.syncmarker << 12 |
1635                   interpolation_mode << 8 |
1636                   0 << 7 | /* FIXME: scale up or down ??? */
1637                   pic_param->range_reduction_frame << 6 |
1638                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1639                   overlap << 4 |
1640                   !pic_param->picture_fields.bits.is_first_field << 3 |
1641                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1642     OUT_BCS_BATCH(batch,
1643                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1644                   picture_type << 26 |
1645                   fcm << 24 |
1646                   alt_pq << 16 |
1647                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1648                   scale_factor << 0);
1649     OUT_BCS_BATCH(batch,
1650                   unified_mv_mode << 28 |
1651                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1652                   pic_param->fast_uvmc_flag << 26 |
1653                   ref_field_pic_polarity << 25 |
1654                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1655                   pic_param->reference_fields.bits.reference_distance << 20 |
1656                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1657                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1658                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1659                   alt_pquant_edge_mask << 4 |
1660                   alt_pquant_config << 2 |
1661                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1662                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1663     OUT_BCS_BATCH(batch,
1664                   !!pic_param->bitplane_present.value << 31 |
1665                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1666                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1667                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1668                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1669                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1670                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1671                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1672                   pic_param->mv_fields.bits.mv_table << 20 |
1673                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1674                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1675                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1676                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1677                   pic_param->mb_mode_table << 8 |
1678                   trans_ac_y << 6 |
1679                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1680                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1681                   pic_param->cbp_table << 0);
1682     ADVANCE_BCS_BATCH(batch);
1683 }
1684
1685 static void
1686 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1687                              struct decode_state *decode_state,
1688                              struct gen7_mfd_context *gen7_mfd_context)
1689 {
1690     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1691     VAPictureParameterBufferVC1 *pic_param;
1692     int intensitycomp_single;
1693
1694     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1695     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1696
1697     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1698     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1699     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1700
1701     BEGIN_BCS_BATCH(batch, 6);
1702     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1703     OUT_BCS_BATCH(batch,
1704                   0 << 14 | /* FIXME: double ??? */
1705                   0 << 12 |
1706                   intensitycomp_single << 10 |
1707                   intensitycomp_single << 8 |
1708                   0 << 4 | /* FIXME: interlace mode */
1709                   0);
1710     OUT_BCS_BATCH(batch,
1711                   pic_param->luma_shift << 16 |
1712                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1713     OUT_BCS_BATCH(batch, 0);
1714     OUT_BCS_BATCH(batch, 0);
1715     OUT_BCS_BATCH(batch, 0);
1716     ADVANCE_BCS_BATCH(batch);
1717 }
1718
1719 static void
1720 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1721                               struct decode_state *decode_state,
1722                               struct gen7_mfd_context *gen7_mfd_context)
1723 {
1724     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1725     struct object_surface *obj_surface;
1726     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1727
1728     obj_surface = decode_state->render_object;
1729
1730     if (obj_surface && obj_surface->private_data) {
1731         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1732     }
1733
1734     obj_surface = decode_state->reference_objects[1];
1735
1736     if (obj_surface && obj_surface->private_data) {
1737         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1738     }
1739
1740     BEGIN_BCS_BATCH(batch, 7);
1741     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1742
1743     if (dmv_write_buffer)
1744         OUT_BCS_RELOC(batch, dmv_write_buffer,
1745                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1746                       0);
1747     else
1748         OUT_BCS_BATCH(batch, 0);
1749
1750     OUT_BCS_BATCH(batch, 0);
1751     OUT_BCS_BATCH(batch, 0);
1752
1753     if (dmv_read_buffer)
1754         OUT_BCS_RELOC(batch, dmv_read_buffer,
1755                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1756                       0);
1757     else
1758         OUT_BCS_BATCH(batch, 0);
1759     
1760     OUT_BCS_BATCH(batch, 0);
1761     OUT_BCS_BATCH(batch, 0);
1762                   
1763     ADVANCE_BCS_BATCH(batch);
1764 }
1765
1766 static int
1767 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1768 {
1769     int out_slice_data_bit_offset;
1770     int slice_header_size = in_slice_data_bit_offset / 8;
1771     int i, j;
1772
1773     if (profile != 3)
1774         out_slice_data_bit_offset = in_slice_data_bit_offset;
1775     else {
1776         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1777             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1778                 i++, j += 2;
1779             }
1780         }
1781
1782         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1783     }
1784
1785     return out_slice_data_bit_offset;
1786 }
1787
1788 static void
1789 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1790                         VAPictureParameterBufferVC1 *pic_param,
1791                         VASliceParameterBufferVC1 *slice_param,
1792                         VASliceParameterBufferVC1 *next_slice_param,
1793                         dri_bo *slice_data_bo,
1794                         struct gen7_mfd_context *gen7_mfd_context)
1795 {
1796     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1797     int next_slice_start_vert_pos;
1798     int macroblock_offset;
1799     uint8_t *slice_data = NULL;
1800
1801     dri_bo_map(slice_data_bo, 0);
1802     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1803     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1804                                                                slice_param->macroblock_offset,
1805                                                                pic_param->sequence_fields.bits.profile);
1806     dri_bo_unmap(slice_data_bo);
1807
1808     if (next_slice_param)
1809         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1810     else
1811         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1812
1813     BEGIN_BCS_BATCH(batch, 5);
1814     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1815     OUT_BCS_BATCH(batch, 
1816                   slice_param->slice_data_size - (macroblock_offset >> 3));
1817     OUT_BCS_BATCH(batch, 
1818                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1819     OUT_BCS_BATCH(batch,
1820                   slice_param->slice_vertical_position << 16 |
1821                   next_slice_start_vert_pos << 0);
1822     OUT_BCS_BATCH(batch,
1823                   (macroblock_offset & 0x7));
1824     ADVANCE_BCS_BATCH(batch);
1825 }
1826
1827 static void
1828 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1829                             struct decode_state *decode_state,
1830                             struct gen7_mfd_context *gen7_mfd_context)
1831 {
1832     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1833     VAPictureParameterBufferVC1 *pic_param;
1834     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1835     dri_bo *slice_data_bo;
1836     int i, j;
1837
1838     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1839     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1840
1841     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1842     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1843     intel_batchbuffer_emit_mi_flush(batch);
1844     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1845     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1846     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1847     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1848     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1849     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1850     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1851
1852     for (j = 0; j < decode_state->num_slice_params; j++) {
1853         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1854         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1855         slice_data_bo = decode_state->slice_datas[j]->bo;
1856         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1857
1858         if (j == decode_state->num_slice_params - 1)
1859             next_slice_group_param = NULL;
1860         else
1861             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1862
1863         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1864             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1865
1866             if (i < decode_state->slice_params[j]->num_elements - 1)
1867                 next_slice_param = slice_param + 1;
1868             else
1869                 next_slice_param = next_slice_group_param;
1870
1871             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1872             slice_param++;
1873         }
1874     }
1875
1876     intel_batchbuffer_end_atomic(batch);
1877     intel_batchbuffer_flush(batch);
1878 }
1879
1880 static void
1881 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1882                           struct decode_state *decode_state,
1883                           struct gen7_mfd_context *gen7_mfd_context)
1884 {
1885     struct object_surface *obj_surface;
1886     VAPictureParameterBufferJPEGBaseline *pic_param;
1887     int subsampling = SUBSAMPLE_YUV420;
1888
1889     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1890
1891     if (pic_param->num_components == 1)
1892         subsampling = SUBSAMPLE_YUV400;
1893     else if (pic_param->num_components == 3) {
1894         int h1 = pic_param->components[0].h_sampling_factor;
1895         int h2 = pic_param->components[1].h_sampling_factor;
1896         int h3 = pic_param->components[2].h_sampling_factor;
1897         int v1 = pic_param->components[0].v_sampling_factor;
1898         int v2 = pic_param->components[1].v_sampling_factor;
1899         int v3 = pic_param->components[2].v_sampling_factor;
1900
1901         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1902             v1 == 2 && v2 == 1 && v3 == 1)
1903             subsampling = SUBSAMPLE_YUV420;
1904         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1905                  v1 == 1 && v2 == 1 && v3 == 1)
1906             subsampling = SUBSAMPLE_YUV422H;
1907         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1908                  v1 == 1 && v2 == 1 && v3 == 1)
1909             subsampling = SUBSAMPLE_YUV444;
1910         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1911                  v1 == 1 && v2 == 1 && v3 == 1)
1912             subsampling = SUBSAMPLE_YUV411;
1913         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1914                  v1 == 2 && v2 == 1 && v3 == 1)
1915             subsampling = SUBSAMPLE_YUV422V;
1916         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1917                  v1 == 2 && v2 == 2 && v3 == 2)
1918             subsampling = SUBSAMPLE_YUV422H;
1919         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1920                  v1 == 2 && v2 == 1 && v3 == 1)
1921             subsampling = SUBSAMPLE_YUV422V;
1922         else
1923             assert(0);
1924     } else {
1925         assert(0);
1926     }
1927
1928     /* Current decoded picture */
1929     obj_surface = decode_state->render_object;
1930     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
1931
1932     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1933     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1934     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1935     gen7_mfd_context->pre_deblocking_output.valid = 1;
1936
1937     gen7_mfd_context->post_deblocking_output.bo = NULL;
1938     gen7_mfd_context->post_deblocking_output.valid = 0;
1939
1940     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1941     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1942
1943     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1944     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1945
1946     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1947     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1948
1949     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1950     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1951
1952     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1953     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1954 }
1955
1956 static const int va_to_gen7_jpeg_rotation[4] = {
1957     GEN7_JPEG_ROTATION_0,
1958     GEN7_JPEG_ROTATION_90,
1959     GEN7_JPEG_ROTATION_180,
1960     GEN7_JPEG_ROTATION_270
1961 };
1962
1963 static void
1964 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1965                         struct decode_state *decode_state,
1966                         struct gen7_mfd_context *gen7_mfd_context)
1967 {
1968     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1969     VAPictureParameterBufferJPEGBaseline *pic_param;
1970     int chroma_type = GEN7_YUV420;
1971     int frame_width_in_blks;
1972     int frame_height_in_blks;
1973
1974     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1975     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1976
1977     if (pic_param->num_components == 1)
1978         chroma_type = GEN7_YUV400;
1979     else if (pic_param->num_components == 3) {
1980         int h1 = pic_param->components[0].h_sampling_factor;
1981         int h2 = pic_param->components[1].h_sampling_factor;
1982         int h3 = pic_param->components[2].h_sampling_factor;
1983         int v1 = pic_param->components[0].v_sampling_factor;
1984         int v2 = pic_param->components[1].v_sampling_factor;
1985         int v3 = pic_param->components[2].v_sampling_factor;
1986
1987         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1988             v1 == 2 && v2 == 1 && v3 == 1)
1989             chroma_type = GEN7_YUV420;
1990         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1991                  v1 == 1 && v2 == 1 && v3 == 1)
1992             chroma_type = GEN7_YUV422H_2Y;
1993         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1994                  v1 == 1 && v2 == 1 && v3 == 1)
1995             chroma_type = GEN7_YUV444;
1996         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1997                  v1 == 1 && v2 == 1 && v3 == 1)
1998             chroma_type = GEN7_YUV411;
1999         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2000                  v1 == 2 && v2 == 1 && v3 == 1)
2001             chroma_type = GEN7_YUV422V_2Y;
2002         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2003                  v1 == 2 && v2 == 2 && v3 == 2)
2004             chroma_type = GEN7_YUV422H_4Y;
2005         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2006                  v1 == 2 && v2 == 1 && v3 == 1)
2007             chroma_type = GEN7_YUV422V_4Y;
2008         else
2009             assert(0);
2010     }
2011
2012     if (chroma_type == GEN7_YUV400 ||
2013         chroma_type == GEN7_YUV444 ||
2014         chroma_type == GEN7_YUV422V_2Y) {
2015         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2016         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2017     } else if (chroma_type == GEN7_YUV411) {
2018         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2019         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2020     } else {
2021         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2022         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2023     }
2024
2025     BEGIN_BCS_BATCH(batch, 3);
2026     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2027     OUT_BCS_BATCH(batch,
2028                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2029                   (chroma_type << 0));
2030     OUT_BCS_BATCH(batch,
2031                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2032                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2033     ADVANCE_BCS_BATCH(batch);
2034 }
2035
2036 static const int va_to_gen7_jpeg_hufftable[2] = {
2037     MFX_HUFFTABLE_ID_Y,
2038     MFX_HUFFTABLE_ID_UV
2039 };
2040
2041 static void
2042 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2043                                struct decode_state *decode_state,
2044                                struct gen7_mfd_context *gen7_mfd_context,
2045                                int num_tables)
2046 {
2047     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2048     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2049     int index;
2050
2051     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2052         return;
2053
2054     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2055
2056     for (index = 0; index < num_tables; index++) {
2057         int id = va_to_gen7_jpeg_hufftable[index];
2058         if (!huffman_table->load_huffman_table[index])
2059             continue;
2060         BEGIN_BCS_BATCH(batch, 53);
2061         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2062         OUT_BCS_BATCH(batch, id);
2063         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2064         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2065         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2066         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2067         ADVANCE_BCS_BATCH(batch);
2068     }
2069 }
2070
2071 static const int va_to_gen7_jpeg_qm[5] = {
2072     -1,
2073     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2074     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2075     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2076     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2077 };
2078
2079 static void
2080 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2081                        struct decode_state *decode_state,
2082                        struct gen7_mfd_context *gen7_mfd_context)
2083 {
2084     VAPictureParameterBufferJPEGBaseline *pic_param;
2085     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2086     int index;
2087
2088     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2089         return;
2090
2091     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2092     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2093
2094     assert(pic_param->num_components <= 3);
2095
2096     for (index = 0; index < pic_param->num_components; index++) {
2097         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2098         int qm_type;
2099         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2100         unsigned char raster_qm[64];
2101         int j;
2102
2103         if (id > 4 || id < 1)
2104             continue;
2105
2106         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2107             continue;
2108
2109         qm_type = va_to_gen7_jpeg_qm[id];
2110
2111         for (j = 0; j < 64; j++)
2112             raster_qm[zigzag_direct[j]] = qm[j];
2113
2114         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2115     }
2116 }
2117
2118 static void
2119 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2120                          VAPictureParameterBufferJPEGBaseline *pic_param,
2121                          VASliceParameterBufferJPEGBaseline *slice_param,
2122                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2123                          dri_bo *slice_data_bo,
2124                          struct gen7_mfd_context *gen7_mfd_context)
2125 {
2126     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2127     int scan_component_mask = 0;
2128     int i;
2129
2130     assert(slice_param->num_components > 0);
2131     assert(slice_param->num_components < 4);
2132     assert(slice_param->num_components <= pic_param->num_components);
2133
2134     for (i = 0; i < slice_param->num_components; i++) {
2135         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2136         case 1:
2137             scan_component_mask |= (1 << 0);
2138             break;
2139         case 2:
2140             scan_component_mask |= (1 << 1);
2141             break;
2142         case 3:
2143             scan_component_mask |= (1 << 2);
2144             break;
2145         default:
2146             assert(0);
2147             break;
2148         }
2149     }
2150
2151     BEGIN_BCS_BATCH(batch, 6);
2152     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2153     OUT_BCS_BATCH(batch, 
2154                   slice_param->slice_data_size);
2155     OUT_BCS_BATCH(batch, 
2156                   slice_param->slice_data_offset);
2157     OUT_BCS_BATCH(batch,
2158                   slice_param->slice_horizontal_position << 16 |
2159                   slice_param->slice_vertical_position << 0);
2160     OUT_BCS_BATCH(batch,
2161                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2162                   (scan_component_mask << 27) |                 /* scan components */
2163                   (0 << 26) |   /* disable interrupt allowed */
2164                   (slice_param->num_mcus << 0));                /* MCU count */
2165     OUT_BCS_BATCH(batch,
2166                   (slice_param->restart_interval << 0));    /* RestartInterval */
2167     ADVANCE_BCS_BATCH(batch);
2168 }
2169
2170 /* Workaround for JPEG decoding on Ivybridge */
2171 #ifdef JPEG_WA
2172
2173 VAStatus 
2174 i965_DestroySurfaces(VADriverContextP ctx,
2175                      VASurfaceID *surface_list,
2176                      int num_surfaces);
2177 VAStatus 
2178 i965_CreateSurfaces(VADriverContextP ctx,
2179                     int width,
2180                     int height,
2181                     int format,
2182                     int num_surfaces,
2183                     VASurfaceID *surfaces);
2184
2185 static struct {
2186     int width;
2187     int height;
2188     unsigned char data[32];
2189     int data_size;
2190     int data_bit_offset;
2191     int qp;
2192 } gen7_jpeg_wa_clip = {
2193     16,
2194     16,
2195     {
2196         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2197         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2198     },
2199     14,
2200     40,
2201     28,
2202 };
2203
2204 static void
2205 gen8_jpeg_wa_init(VADriverContextP ctx,
2206                   struct gen7_mfd_context *gen7_mfd_context)
2207 {
2208     struct i965_driver_data *i965 = i965_driver_data(ctx);
2209     VAStatus status;
2210     struct object_surface *obj_surface;
2211
2212     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2213         i965_DestroySurfaces(ctx,
2214                              &gen7_mfd_context->jpeg_wa_surface_id,
2215                              1);
2216
2217     status = i965_CreateSurfaces(ctx,
2218                                  gen7_jpeg_wa_clip.width,
2219                                  gen7_jpeg_wa_clip.height,
2220                                  VA_RT_FORMAT_YUV420,
2221                                  1,
2222                                  &gen7_mfd_context->jpeg_wa_surface_id);
2223     assert(status == VA_STATUS_SUCCESS);
2224
2225     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2226     assert(obj_surface);
2227     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2228     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2229
2230     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2231         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2232                                                                "JPEG WA data",
2233                                                                0x1000,
2234                                                                0x1000);
2235         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2236                        0,
2237                        gen7_jpeg_wa_clip.data_size,
2238                        gen7_jpeg_wa_clip.data);
2239     }
2240 }
2241
2242 static void
2243 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2244                               struct gen7_mfd_context *gen7_mfd_context)
2245 {
2246     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2247
2248     BEGIN_BCS_BATCH(batch, 5);
2249     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2250     OUT_BCS_BATCH(batch,
2251                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2252                   (MFD_MODE_VLD << 15) | /* VLD mode */
2253                   (0 << 10) | /* disable Stream-Out */
2254                   (0 << 9)  | /* Post Deblocking Output */
2255                   (1 << 8)  | /* Pre Deblocking Output */
2256                   (0 << 5)  | /* not in stitch mode */
2257                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2258                   (MFX_FORMAT_AVC << 0));
2259     OUT_BCS_BATCH(batch,
2260                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2261                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2262                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2263                   (0 << 1)  |
2264                   (0 << 0));
2265     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2266     OUT_BCS_BATCH(batch, 0); /* reserved */
2267     ADVANCE_BCS_BATCH(batch);
2268 }
2269
2270 static void
2271 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2272                            struct gen7_mfd_context *gen7_mfd_context)
2273 {
2274     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2275     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2276
2277     BEGIN_BCS_BATCH(batch, 6);
2278     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2279     OUT_BCS_BATCH(batch, 0);
2280     OUT_BCS_BATCH(batch,
2281                   ((obj_surface->orig_width - 1) << 18) |
2282                   ((obj_surface->orig_height - 1) << 4));
2283     OUT_BCS_BATCH(batch,
2284                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2285                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2286                   (0 << 22) | /* surface object control state, ignored */
2287                   ((obj_surface->width - 1) << 3) | /* pitch */
2288                   (0 << 2)  | /* must be 0 */
2289                   (1 << 1)  | /* must be tiled */
2290                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2291     OUT_BCS_BATCH(batch,
2292                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2293                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2294     OUT_BCS_BATCH(batch,
2295                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2296                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2297     ADVANCE_BCS_BATCH(batch);
2298 }
2299
2300 static void
2301 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2302                                  struct gen7_mfd_context *gen7_mfd_context)
2303 {
2304     struct i965_driver_data *i965 = i965_driver_data(ctx);
2305     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2306     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2307     dri_bo *intra_bo;
2308     int i;
2309
2310     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2311                             "intra row store",
2312                             128 * 64,
2313                             0x1000);
2314
2315     BEGIN_BCS_BATCH(batch, 61);
2316     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2317     OUT_BCS_RELOC(batch,
2318                   obj_surface->bo,
2319                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2320                   0);
2321         OUT_BCS_BATCH(batch, 0);
2322         OUT_BCS_BATCH(batch, 0);
2323     
2324
2325     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2326         OUT_BCS_BATCH(batch, 0);
2327         OUT_BCS_BATCH(batch, 0);
2328
2329         /* uncompressed-video & stream out 7-12 */
2330     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2331     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2332         OUT_BCS_BATCH(batch, 0);
2333         OUT_BCS_BATCH(batch, 0);
2334         OUT_BCS_BATCH(batch, 0);
2335         OUT_BCS_BATCH(batch, 0);
2336
2337         /* the DW 13-15 is for intra row store scratch */
2338     OUT_BCS_RELOC(batch,
2339                   intra_bo,
2340                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2341                   0);
2342         OUT_BCS_BATCH(batch, 0);
2343         OUT_BCS_BATCH(batch, 0);
2344
2345         /* the DW 16-18 is for deblocking filter */ 
2346     OUT_BCS_BATCH(batch, 0);
2347         OUT_BCS_BATCH(batch, 0);
2348         OUT_BCS_BATCH(batch, 0);
2349
2350     /* DW 19..50 */
2351     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2352         OUT_BCS_BATCH(batch, 0);
2353         OUT_BCS_BATCH(batch, 0);
2354     }
2355     OUT_BCS_BATCH(batch, 0);
2356
2357         /* the DW52-54 is for mb status address */
2358     OUT_BCS_BATCH(batch, 0);
2359         OUT_BCS_BATCH(batch, 0);
2360         OUT_BCS_BATCH(batch, 0);
2361         /* the DW56-60 is for ILDB & second ILDB address */
2362     OUT_BCS_BATCH(batch, 0);
2363         OUT_BCS_BATCH(batch, 0);
2364         OUT_BCS_BATCH(batch, 0);
2365     OUT_BCS_BATCH(batch, 0);
2366         OUT_BCS_BATCH(batch, 0);
2367         OUT_BCS_BATCH(batch, 0);
2368
2369     ADVANCE_BCS_BATCH(batch);
2370
2371     dri_bo_unreference(intra_bo);
2372 }
2373
2374 static void
2375 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2376                                      struct gen7_mfd_context *gen7_mfd_context)
2377 {
2378     struct i965_driver_data *i965 = i965_driver_data(ctx);
2379     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2380     dri_bo *bsd_mpc_bo, *mpr_bo;
2381
2382     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2383                               "bsd mpc row store",
2384                               11520, /* 1.5 * 120 * 64 */
2385                               0x1000);
2386
2387     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2388                           "mpr row store",
2389                           7680, /* 1. 0 * 120 * 64 */
2390                           0x1000);
2391
2392     BEGIN_BCS_BATCH(batch, 10);
2393     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2394
2395     OUT_BCS_RELOC(batch,
2396                   bsd_mpc_bo,
2397                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2398                   0);
2399
2400     OUT_BCS_BATCH(batch, 0);
2401     OUT_BCS_BATCH(batch, 0);
2402
2403     OUT_BCS_RELOC(batch,
2404                   mpr_bo,
2405                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2406                   0);
2407     OUT_BCS_BATCH(batch, 0);
2408     OUT_BCS_BATCH(batch, 0);
2409
2410     OUT_BCS_BATCH(batch, 0);
2411     OUT_BCS_BATCH(batch, 0);
2412     OUT_BCS_BATCH(batch, 0);
2413
2414     ADVANCE_BCS_BATCH(batch);
2415
2416     dri_bo_unreference(bsd_mpc_bo);
2417     dri_bo_unreference(mpr_bo);
2418 }
2419
2420 static void
2421 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2422                           struct gen7_mfd_context *gen7_mfd_context)
2423 {
2424
2425 }
2426
2427 static void
2428 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2429                            struct gen7_mfd_context *gen7_mfd_context)
2430 {
2431     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2432     int img_struct = 0;
2433     int mbaff_frame_flag = 0;
2434     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2435
2436     BEGIN_BCS_BATCH(batch, 16);
2437     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2438     OUT_BCS_BATCH(batch, 
2439                   width_in_mbs * height_in_mbs);
2440     OUT_BCS_BATCH(batch, 
2441                   ((height_in_mbs - 1) << 16) | 
2442                   ((width_in_mbs - 1) << 0));
2443     OUT_BCS_BATCH(batch, 
2444                   (0 << 24) |
2445                   (0 << 16) |
2446                   (0 << 14) |
2447                   (0 << 13) |
2448                   (0 << 12) | /* differ from GEN6 */
2449                   (0 << 10) |
2450                   (img_struct << 8));
2451     OUT_BCS_BATCH(batch,
2452                   (1 << 10) | /* 4:2:0 */
2453                   (1 << 7) |  /* CABAC */
2454                   (0 << 6) |
2455                   (0 << 5) |
2456                   (0 << 4) |
2457                   (0 << 3) |
2458                   (1 << 2) |
2459                   (mbaff_frame_flag << 1) |
2460                   (0 << 0));
2461     OUT_BCS_BATCH(batch, 0);
2462     OUT_BCS_BATCH(batch, 0);
2463     OUT_BCS_BATCH(batch, 0);
2464     OUT_BCS_BATCH(batch, 0);
2465     OUT_BCS_BATCH(batch, 0);
2466     OUT_BCS_BATCH(batch, 0);
2467     OUT_BCS_BATCH(batch, 0);
2468     OUT_BCS_BATCH(batch, 0);
2469     OUT_BCS_BATCH(batch, 0);
2470     OUT_BCS_BATCH(batch, 0);
2471     OUT_BCS_BATCH(batch, 0);
2472     ADVANCE_BCS_BATCH(batch);
2473 }
2474
2475 static void
2476 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2477                                   struct gen7_mfd_context *gen7_mfd_context)
2478 {
2479     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2480     int i;
2481
2482     BEGIN_BCS_BATCH(batch, 71);
2483     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2484
2485     /* reference surfaces 0..15 */
2486     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2487         OUT_BCS_BATCH(batch, 0); /* top */
2488         OUT_BCS_BATCH(batch, 0); /* bottom */
2489     }
2490         
2491         OUT_BCS_BATCH(batch, 0);
2492
2493     /* the current decoding frame/field */
2494     OUT_BCS_BATCH(batch, 0); /* top */
2495     OUT_BCS_BATCH(batch, 0);
2496     OUT_BCS_BATCH(batch, 0);
2497
2498     /* POC List */
2499     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2500         OUT_BCS_BATCH(batch, 0);
2501         OUT_BCS_BATCH(batch, 0);
2502     }
2503
2504     OUT_BCS_BATCH(batch, 0);
2505     OUT_BCS_BATCH(batch, 0);
2506
2507     ADVANCE_BCS_BATCH(batch);
2508 }
2509
2510 static void
2511 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2512                                      struct gen7_mfd_context *gen7_mfd_context)
2513 {
2514     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2515
2516     BEGIN_BCS_BATCH(batch, 11);
2517     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2518     OUT_BCS_RELOC(batch,
2519                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2520                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2521                   0);
2522     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2523     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2524     OUT_BCS_BATCH(batch, 0);
2525     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2526     OUT_BCS_BATCH(batch, 0);
2527     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2528     OUT_BCS_BATCH(batch, 0);
2529     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2530     OUT_BCS_BATCH(batch, 0);
2531     ADVANCE_BCS_BATCH(batch);
2532 }
2533
2534 static void
2535 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2536                             struct gen7_mfd_context *gen7_mfd_context)
2537 {
2538     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2539
2540     /* the input bitsteam format on GEN7 differs from GEN6 */
2541     BEGIN_BCS_BATCH(batch, 6);
2542     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2543     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2544     OUT_BCS_BATCH(batch, 0);
2545     OUT_BCS_BATCH(batch,
2546                   (0 << 31) |
2547                   (0 << 14) |
2548                   (0 << 12) |
2549                   (0 << 10) |
2550                   (0 << 8));
2551     OUT_BCS_BATCH(batch,
2552                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2553                   (0 << 5)  |
2554                   (0 << 4)  |
2555                   (1 << 3) | /* LastSlice Flag */
2556                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2557     OUT_BCS_BATCH(batch, 0);
2558     ADVANCE_BCS_BATCH(batch);
2559 }
2560
2561 static void
2562 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2563                              struct gen7_mfd_context *gen7_mfd_context)
2564 {
2565     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2566     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2567     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2568     int first_mb_in_slice = 0;
2569     int slice_type = SLICE_TYPE_I;
2570
2571     BEGIN_BCS_BATCH(batch, 11);
2572     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2573     OUT_BCS_BATCH(batch, slice_type);
2574     OUT_BCS_BATCH(batch, 
2575                   (num_ref_idx_l1 << 24) |
2576                   (num_ref_idx_l0 << 16) |
2577                   (0 << 8) |
2578                   (0 << 0));
2579     OUT_BCS_BATCH(batch, 
2580                   (0 << 29) |
2581                   (1 << 27) |   /* disable Deblocking */
2582                   (0 << 24) |
2583                   (gen7_jpeg_wa_clip.qp << 16) |
2584                   (0 << 8) |
2585                   (0 << 0));
2586     OUT_BCS_BATCH(batch, 
2587                   (slice_ver_pos << 24) |
2588                   (slice_hor_pos << 16) | 
2589                   (first_mb_in_slice << 0));
2590     OUT_BCS_BATCH(batch,
2591                   (next_slice_ver_pos << 16) |
2592                   (next_slice_hor_pos << 0));
2593     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2594     OUT_BCS_BATCH(batch, 0);
2595     OUT_BCS_BATCH(batch, 0);
2596     OUT_BCS_BATCH(batch, 0);
2597     OUT_BCS_BATCH(batch, 0);
2598     ADVANCE_BCS_BATCH(batch);
2599 }
2600
2601 static void
2602 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2603                  struct gen7_mfd_context *gen7_mfd_context)
2604 {
2605     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2606     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2607     intel_batchbuffer_emit_mi_flush(batch);
2608     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2609     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2610     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2611     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2612     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2613     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2614     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2615
2616     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2617     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2618     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2619 }
2620
2621 #endif
2622
2623 void
2624 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2625                              struct decode_state *decode_state,
2626                              struct gen7_mfd_context *gen7_mfd_context)
2627 {
2628     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2629     VAPictureParameterBufferJPEGBaseline *pic_param;
2630     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2631     dri_bo *slice_data_bo;
2632     int i, j, max_selector = 0;
2633
2634     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2635     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2636
2637     /* Currently only support Baseline DCT */
2638     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2639     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2640 #ifdef JPEG_WA
2641     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2642 #endif
2643     intel_batchbuffer_emit_mi_flush(batch);
2644     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2645     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2646     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2647     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2648     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2649
2650     for (j = 0; j < decode_state->num_slice_params; j++) {
2651         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2652         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2653         slice_data_bo = decode_state->slice_datas[j]->bo;
2654         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2655
2656         if (j == decode_state->num_slice_params - 1)
2657             next_slice_group_param = NULL;
2658         else
2659             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2660
2661         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2662             int component;
2663
2664             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2665
2666             if (i < decode_state->slice_params[j]->num_elements - 1)
2667                 next_slice_param = slice_param + 1;
2668             else
2669                 next_slice_param = next_slice_group_param;
2670
2671             for (component = 0; component < slice_param->num_components; component++) {
2672                 if (max_selector < slice_param->components[component].dc_table_selector)
2673                     max_selector = slice_param->components[component].dc_table_selector;
2674
2675                 if (max_selector < slice_param->components[component].ac_table_selector)
2676                     max_selector = slice_param->components[component].ac_table_selector;
2677             }
2678
2679             slice_param++;
2680         }
2681     }
2682
2683     assert(max_selector < 2);
2684     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2685
2686     for (j = 0; j < decode_state->num_slice_params; j++) {
2687         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2688         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2689         slice_data_bo = decode_state->slice_datas[j]->bo;
2690         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2691
2692         if (j == decode_state->num_slice_params - 1)
2693             next_slice_group_param = NULL;
2694         else
2695             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2696
2697         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2698             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2699
2700             if (i < decode_state->slice_params[j]->num_elements - 1)
2701                 next_slice_param = slice_param + 1;
2702             else
2703                 next_slice_param = next_slice_group_param;
2704
2705             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2706             slice_param++;
2707         }
2708     }
2709
2710     intel_batchbuffer_end_atomic(batch);
2711     intel_batchbuffer_flush(batch);
2712 }
2713
2714 static void
2715 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2716                           struct decode_state *decode_state,
2717                           struct gen7_mfd_context *gen7_mfd_context)
2718 {
2719     struct object_surface *obj_surface;
2720
2721     /* Current decoded picture */
2722     obj_surface = decode_state->render_object;
2723     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2724 }
2725
2726 static void
2727 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2728                        struct decode_state *decode_state,
2729                        struct gen7_mfd_context *gen7_mfd_context)
2730 {
2731     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2732     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2733     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2734     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2735     dri_bo *probs_bo = decode_state->probability_data->bo;
2736     int i, j;
2737
2738     BEGIN_BCS_BATCH(batch, 38);
2739     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2740     OUT_BCS_BATCH(batch,
2741                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2742                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2743     OUT_BCS_BATCH(batch,
2744                   slice_param->num_of_partitions << 24 |
2745                   pic_param->pic_fields.bits.sharpness_level << 16 |
2746                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2747                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2748                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2749                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2750                   pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2751                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2752                   0 << 7 | /* segmentation id streamin disabled */
2753                   0 << 6 | /* segmentation id streamout disabled */
2754                   pic_param->pic_fields.bits.key_frame << 5 |
2755                   pic_param->pic_fields.bits.filter_type << 4 |
2756                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2757                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2758
2759     OUT_BCS_BATCH(batch,
2760                   pic_param->loop_filter_level[3] << 24 |
2761                   pic_param->loop_filter_level[2] << 16 |
2762                   pic_param->loop_filter_level[1] <<  8 |
2763                   pic_param->loop_filter_level[0] <<  0);
2764
2765     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2766     for (i = 0; i < 4; i++) {
2767         OUT_BCS_BATCH(batch,
2768                       iq_matrix->quantization_index[i][0] << 16 | /* Y1AC */
2769                       iq_matrix->quantization_index[i][1] <<  0); /* Y1DC */
2770         OUT_BCS_BATCH(batch,
2771                       iq_matrix->quantization_index[i][5] << 16 | /* UVAC */
2772                       iq_matrix->quantization_index[i][4] <<  0); /* UVDC */
2773         OUT_BCS_BATCH(batch,
2774                       iq_matrix->quantization_index[i][3] << 16 | /* Y2AC */
2775                       iq_matrix->quantization_index[i][2] <<  0); /* Y2DC */
2776     }
2777
2778     /* CoeffProbability table for non-key frame, DW16-DW18 */
2779     if (probs_bo) {
2780         OUT_BCS_RELOC(batch, probs_bo,
2781                       0, I915_GEM_DOMAIN_INSTRUCTION,
2782                       0);
2783         OUT_BCS_BATCH(batch, 0);
2784         OUT_BCS_BATCH(batch, 0);
2785     } else {
2786         OUT_BCS_BATCH(batch, 0);
2787         OUT_BCS_BATCH(batch, 0);
2788         OUT_BCS_BATCH(batch, 0);
2789     }
2790
2791     OUT_BCS_BATCH(batch,
2792                   pic_param->mb_segment_tree_probs[2] << 16 |
2793                   pic_param->mb_segment_tree_probs[1] <<  8 |
2794                   pic_param->mb_segment_tree_probs[0] <<  0);
2795
2796     OUT_BCS_BATCH(batch,
2797                   pic_param->prob_skip_false << 24 |
2798                   pic_param->prob_intra      << 16 |
2799                   pic_param->prob_last       <<  8 |
2800                   pic_param->prob_gf         <<  0);
2801
2802     OUT_BCS_BATCH(batch,
2803                   pic_param->y_mode_probs[3] << 24 |
2804                   pic_param->y_mode_probs[2] << 16 |
2805                   pic_param->y_mode_probs[1] <<  8 |
2806                   pic_param->y_mode_probs[0] <<  0);
2807
2808     OUT_BCS_BATCH(batch,
2809                   pic_param->uv_mode_probs[2] << 16 |
2810                   pic_param->uv_mode_probs[1] <<  8 |
2811                   pic_param->uv_mode_probs[0] <<  0);
2812     
2813     /* MV update value, DW23-DW32 */
2814     for (i = 0; i < 2; i++) {
2815         for (j = 0; j < 20; j += 4) {
2816             OUT_BCS_BATCH(batch,
2817                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2818                           pic_param->mv_probs[i][j + 2] << 16 |
2819                           pic_param->mv_probs[i][j + 1] <<  8 |
2820                           pic_param->mv_probs[i][j + 0] <<  0);
2821         }
2822     }
2823
2824     OUT_BCS_BATCH(batch,
2825                   pic_param->loop_filter_deltas_ref_frame[3] << 24 |
2826                   pic_param->loop_filter_deltas_ref_frame[2] << 16 |
2827                   pic_param->loop_filter_deltas_ref_frame[1] <<  8 |
2828                   pic_param->loop_filter_deltas_ref_frame[0] <<  0);
2829
2830     OUT_BCS_BATCH(batch,
2831                   pic_param->loop_filter_deltas_mode[3] << 24 |
2832                   pic_param->loop_filter_deltas_mode[2] << 16 |
2833                   pic_param->loop_filter_deltas_mode[1] <<  8 |
2834                   pic_param->loop_filter_deltas_mode[0] <<  0);
2835
2836     /* segmentation id stream base address, DW35-DW37 */
2837     OUT_BCS_BATCH(batch, 0);
2838     OUT_BCS_BATCH(batch, 0);
2839     OUT_BCS_BATCH(batch, 0);
2840     ADVANCE_BCS_BATCH(batch);
2841 }
2842
2843 static void
2844 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2845                         VAPictureParameterBufferVP8 *pic_param,
2846                         VASliceParameterBufferVP8 *slice_param,
2847                         dri_bo *slice_data_bo,
2848                         struct gen7_mfd_context *gen7_mfd_context)
2849 {
2850     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2851
2852     BEGIN_BCS_BATCH(batch, 22);
2853     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2854     OUT_BCS_BATCH(batch,
2855                   0 << 16 | /* Partition 0 CPBAC Entropy Count */
2856                   0 <<  8 | /* Partition 0 Count Entropy Range */
2857                   slice_param->num_of_partitions << 4 |
2858                   (slice_param->macroblock_offset & 0x7));
2859     OUT_BCS_BATCH(batch,
2860                   0 << 24 | /* Partition 0 Count Entropy Value */
2861                   0);
2862     OUT_BCS_BATCH(batch,
2863                   0);       /* Partition 0 Data length, DW3 */
2864     OUT_BCS_BATCH(batch,
2865                   0);       /* Partition 0 Data offset, DW4 */
2866     OUT_BCS_BATCH(batch,
2867                   0);       /* Partition 1 Data length, DW5 */
2868     OUT_BCS_BATCH(batch,
2869                   0);       /* Partition 1 Data offset, DW6 */
2870     OUT_BCS_BATCH(batch,
2871                   0);       /* Partition 2 Data length, DW7 */
2872     OUT_BCS_BATCH(batch,
2873                   0);       /* Partition 2 Data offset, DW8 */
2874     OUT_BCS_BATCH(batch,
2875                   0);       /* Partition 3 Data length, DW9 */
2876     OUT_BCS_BATCH(batch,
2877                   0);       /* Partition 3 Data offset, DW10 */
2878     OUT_BCS_BATCH(batch,
2879                   0);       /* Partition 4 Data length, DW11 */
2880     OUT_BCS_BATCH(batch,
2881                   0);       /* Partition 4 Data offset, DW12 */
2882     OUT_BCS_BATCH(batch,
2883                   0);       /* Partition 5 Data length, DW13 */
2884     OUT_BCS_BATCH(batch,
2885                   0);       /* Partition 5 Data offset, DW14 */
2886     OUT_BCS_BATCH(batch,
2887                   0);       /* Partition 6 Data length, DW15 */
2888     OUT_BCS_BATCH(batch,
2889                   0);       /* Partition 6 Data offset, DW16 */
2890     OUT_BCS_BATCH(batch,
2891                   0);       /* Partition 7 Data length, DW17 */
2892     OUT_BCS_BATCH(batch,
2893                   0);       /* Partition 7 Data offset, DW18 */
2894     OUT_BCS_BATCH(batch,
2895                   0);       /* Partition 8 Data length, DW19 */
2896     OUT_BCS_BATCH(batch,
2897                   0);       /* Partition 8 Data offset, DW20 */
2898     OUT_BCS_BATCH(batch,
2899                   1 << 31 | /* concealment method */
2900                   0);
2901     ADVANCE_BCS_BATCH(batch);
2902 }
2903
2904 void
2905 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
2906                             struct decode_state *decode_state,
2907                             struct gen7_mfd_context *gen7_mfd_context)
2908 {
2909     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2910     VAPictureParameterBufferVP8 *pic_param;
2911
2912     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2913     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2914
2915     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
2916     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2917     intel_batchbuffer_emit_mi_flush(batch);
2918     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
2919     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
2920     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
2921     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
2922     intel_batchbuffer_end_atomic(batch);
2923     intel_batchbuffer_flush(batch);
2924 }
2925
2926 static VAStatus
2927 gen8_mfd_decode_picture(VADriverContextP ctx, 
2928                         VAProfile profile, 
2929                         union codec_state *codec_state,
2930                         struct hw_context *hw_context)
2931
2932 {
2933     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2934     struct decode_state *decode_state = &codec_state->decode;
2935     VAStatus vaStatus;
2936
2937     assert(gen7_mfd_context);
2938
2939     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
2940
2941     if (vaStatus != VA_STATUS_SUCCESS)
2942         goto out;
2943
2944     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
2945
2946     switch (profile) {
2947     case VAProfileMPEG2Simple:
2948     case VAProfileMPEG2Main:
2949         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
2950         break;
2951         
2952     case VAProfileH264Baseline:
2953     case VAProfileH264Main:
2954     case VAProfileH264High:
2955         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
2956         break;
2957
2958     case VAProfileVC1Simple:
2959     case VAProfileVC1Main:
2960     case VAProfileVC1Advanced:
2961         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
2962         break;
2963
2964     case VAProfileJPEGBaseline:
2965         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
2966         break;
2967
2968     case VAProfileVP8Version0_3:
2969         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
2970         break;
2971
2972     default:
2973         assert(0);
2974         break;
2975     }
2976
2977     vaStatus = VA_STATUS_SUCCESS;
2978
2979 out:
2980     return vaStatus;
2981 }
2982
2983 static void
2984 gen8_mfd_context_destroy(void *hw_context)
2985 {
2986     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2987
2988     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2989     gen7_mfd_context->post_deblocking_output.bo = NULL;
2990
2991     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2992     gen7_mfd_context->pre_deblocking_output.bo = NULL;
2993
2994     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2995     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2996
2997     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2998     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2999
3000     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3001     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3002
3003     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3004     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3005
3006     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3007     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3008
3009     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3010
3011     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3012     free(gen7_mfd_context);
3013 }
3014
3015 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3016                                     struct gen7_mfd_context *gen7_mfd_context)
3017 {
3018     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3019     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3020     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3021     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3022 }
3023
3024 struct hw_context *
3025 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3026 {
3027     struct intel_driver_data *intel = intel_driver_data(ctx);
3028     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3029     int i;
3030
3031     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3032     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3033     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3034
3035     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3036         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3037         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3038     }
3039
3040     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3041
3042     switch (obj_config->profile) {
3043     case VAProfileMPEG2Simple:
3044     case VAProfileMPEG2Main:
3045         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3046         break;
3047
3048     case VAProfileH264Baseline:
3049     case VAProfileH264Main:
3050     case VAProfileH264High:
3051         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3052         break;
3053     default:
3054         break;
3055     }
3056     return (struct hw_context *)gen7_mfd_context;
3057 }