OSDN Git Service

Add sysdeps.h for system-dependent definitions.
[android-x86/hardware-intel-common-vaapi.git] / src / gen7_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include "sysdeps.h"
30 #include "intel_batchbuffer.h"
31 #include "intel_driver.h"
32 #include "i965_defines.h"
33 #include "i965_drv_video.h"
34 #include "i965_decoder_utils.h"
35
36 #include "gen7_mfd.h"
37
38 static const uint32_t zigzag_direct[64] = {
39     0,   1,  8, 16,  9,  2,  3, 10,
40     17, 24, 32, 25, 18, 11,  4,  5,
41     12, 19, 26, 33, 40, 48, 41, 34,
42     27, 20, 13,  6,  7, 14, 21, 28,
43     35, 42, 49, 56, 57, 50, 43, 36,
44     29, 22, 15, 23, 30, 37, 44, 51,
45     58, 59, 52, 45, 38, 31, 39, 46,
46     53, 60, 61, 54, 47, 55, 62, 63
47 };
48
49 static void
50 gen7_mfd_avc_frame_store_index(VADriverContextP ctx,
51                                VAPictureParameterBufferH264 *pic_param,
52                                struct gen7_mfd_context *gen7_mfd_context)
53 {
54     struct i965_driver_data *i965 = i965_driver_data(ctx);
55     int i, j;
56
57     assert(ARRAY_ELEMS(gen7_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
58
59     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
60         int found = 0;
61
62         if (gen7_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
63             continue;
64
65         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
66             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
67             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
68                 continue;
69
70             if (gen7_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
71                 found = 1;
72                 break;
73             }
74         }
75
76         if (!found) {
77             struct object_surface *obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
78             obj_surface->flags &= ~SURFACE_REFERENCED;
79
80             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
81                 dri_bo_unreference(obj_surface->bo);
82                 obj_surface->bo = NULL;
83                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
84             }
85
86             if (obj_surface->free_private_data)
87                 obj_surface->free_private_data(&obj_surface->private_data);
88
89             gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
90             gen7_mfd_context->reference_surface[i].frame_store_id = -1;
91         }
92     }
93
94     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
95         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
96         int found = 0;
97
98         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
99             continue;
100
101         for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
102             if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
103                 continue;
104             
105             if (gen7_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
106                 found = 1;
107                 break;
108             }
109         }
110
111         if (!found) {
112             int frame_idx;
113             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
114             
115             assert(obj_surface);
116             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
117
118             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
119                 for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
120                     if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
121                         continue;
122
123                     if (gen7_mfd_context->reference_surface[j].frame_store_id == frame_idx)
124                         break;
125                 }
126
127                 if (j == ARRAY_ELEMS(gen7_mfd_context->reference_surface))
128                     break;
129             }
130
131             assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
132
133             for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
134                 if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
135                     gen7_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
136                     gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
137                     break;
138                 }
139             }
140         }
141     }
142
143     /* sort */
144     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface) - 1; i++) {
145         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
146             gen7_mfd_context->reference_surface[i].frame_store_id == i)
147             continue;
148
149         for (j = i + 1; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
150             if (gen7_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
151                 gen7_mfd_context->reference_surface[j].frame_store_id == i) {
152                 VASurfaceID id = gen7_mfd_context->reference_surface[i].surface_id;
153                 int frame_idx = gen7_mfd_context->reference_surface[i].frame_store_id;
154
155                 gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[j].surface_id;
156                 gen7_mfd_context->reference_surface[i].frame_store_id = gen7_mfd_context->reference_surface[j].frame_store_id;
157                 gen7_mfd_context->reference_surface[j].surface_id = id;
158                 gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
159                 break;
160             }
161         }
162     }
163 }
164
165 static void 
166 gen7_mfd_free_avc_surface(void **data)
167 {
168     struct gen7_avc_surface *gen7_avc_surface = *data;
169
170     if (!gen7_avc_surface)
171         return;
172
173     dri_bo_unreference(gen7_avc_surface->dmv_top);
174     gen7_avc_surface->dmv_top = NULL;
175     dri_bo_unreference(gen7_avc_surface->dmv_bottom);
176     gen7_avc_surface->dmv_bottom = NULL;
177
178     free(gen7_avc_surface);
179     *data = NULL;
180 }
181
182 static void
183 gen7_mfd_init_avc_surface(VADriverContextP ctx, 
184                           VAPictureParameterBufferH264 *pic_param,
185                           struct object_surface *obj_surface)
186 {
187     struct i965_driver_data *i965 = i965_driver_data(ctx);
188     struct gen7_avc_surface *gen7_avc_surface = obj_surface->private_data;
189     int width_in_mbs, height_in_mbs;
190
191     obj_surface->free_private_data = gen7_mfd_free_avc_surface;
192     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
193     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
194
195     if (!gen7_avc_surface) {
196         gen7_avc_surface = calloc(sizeof(struct gen7_avc_surface), 1);
197         assert((obj_surface->size & 0x3f) == 0);
198         obj_surface->private_data = gen7_avc_surface;
199     }
200
201     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
202                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
203
204     if (gen7_avc_surface->dmv_top == NULL) {
205         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
206                                                  "direct mv w/r buffer",
207                                                  width_in_mbs * height_in_mbs * 64,
208                                                  0x1000);
209         assert(gen7_avc_surface->dmv_top);
210     }
211
212     if (gen7_avc_surface->dmv_bottom_flag &&
213         gen7_avc_surface->dmv_bottom == NULL) {
214         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
215                                                     "direct mv w/r buffer",
216                                                     width_in_mbs * height_in_mbs * 64,                                                    
217                                                     0x1000);
218         assert(gen7_avc_surface->dmv_bottom);
219     }
220 }
221
222 static void
223 gen7_mfd_pipe_mode_select(VADriverContextP ctx,
224                           struct decode_state *decode_state,
225                           int standard_select,
226                           struct gen7_mfd_context *gen7_mfd_context)
227 {
228     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
229
230     assert(standard_select == MFX_FORMAT_MPEG2 ||
231            standard_select == MFX_FORMAT_AVC ||
232            standard_select == MFX_FORMAT_VC1 ||
233            standard_select == MFX_FORMAT_JPEG);
234
235     BEGIN_BCS_BATCH(batch, 5);
236     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
237     OUT_BCS_BATCH(batch,
238                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
239                   (MFD_MODE_VLD << 15) | /* VLD mode */
240                   (0 << 10) | /* disable Stream-Out */
241                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
242                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
243                   (0 << 5)  | /* not in stitch mode */
244                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
245                   (standard_select << 0));
246     OUT_BCS_BATCH(batch,
247                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
248                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
249                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
250                   (0 << 1)  |
251                   (0 << 0));
252     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
253     OUT_BCS_BATCH(batch, 0); /* reserved */
254     ADVANCE_BCS_BATCH(batch);
255 }
256
257 static void
258 gen7_mfd_surface_state(VADriverContextP ctx,
259                        struct decode_state *decode_state,
260                        int standard_select,
261                        struct gen7_mfd_context *gen7_mfd_context)
262 {
263     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
264     struct i965_driver_data *i965 = i965_driver_data(ctx);
265     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
266     unsigned int y_cb_offset;
267     unsigned int y_cr_offset;
268
269     assert(obj_surface);
270
271     y_cb_offset = obj_surface->y_cb_offset;
272     y_cr_offset = obj_surface->y_cr_offset;
273
274     BEGIN_BCS_BATCH(batch, 6);
275     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch,
278                   ((obj_surface->orig_height - 1) << 18) |
279                   ((obj_surface->orig_width - 1) << 4));
280     OUT_BCS_BATCH(batch,
281                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
282                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
283                   (0 << 22) | /* surface object control state, ignored */
284                   ((obj_surface->width - 1) << 3) | /* pitch */
285                   (0 << 2)  | /* must be 0 */
286                   (1 << 1)  | /* must be tiled */
287                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
288     OUT_BCS_BATCH(batch,
289                   (0 << 16) | /* X offset for U(Cb), must be 0 */
290                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
291     OUT_BCS_BATCH(batch,
292                   (0 << 16) | /* X offset for V(Cr), must be 0 */
293                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
294     ADVANCE_BCS_BATCH(batch);
295 }
296
297 static void
298 gen7_mfd_pipe_buf_addr_state(VADriverContextP ctx,
299                              struct decode_state *decode_state,
300                              int standard_select,
301                              struct gen7_mfd_context *gen7_mfd_context)
302 {
303     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
304     struct i965_driver_data *i965 = i965_driver_data(ctx);
305     int i;
306
307     BEGIN_BCS_BATCH(batch, 24);
308     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
309     if (gen7_mfd_context->pre_deblocking_output.valid)
310         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
311                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
312                       0);
313     else
314         OUT_BCS_BATCH(batch, 0);
315
316     if (gen7_mfd_context->post_deblocking_output.valid)
317         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
318                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
319                       0);
320     else
321         OUT_BCS_BATCH(batch, 0);
322
323     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
324     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
325
326     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
327         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
328                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
329                       0);
330     else
331         OUT_BCS_BATCH(batch, 0);
332
333     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
334         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
335                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
336                       0);
337     else
338         OUT_BCS_BATCH(batch, 0);
339
340     /* DW 7..22 */
341     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
342         struct object_surface *obj_surface;
343
344         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
345             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
346             assert(obj_surface && obj_surface->bo);
347
348             OUT_BCS_RELOC(batch, obj_surface->bo,
349                           I915_GEM_DOMAIN_INSTRUCTION, 0,
350                           0);
351         } else {
352             OUT_BCS_BATCH(batch, 0);
353         }
354     }
355
356     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
357     ADVANCE_BCS_BATCH(batch);
358 }
359
360 static void
361 gen7_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
362                                  dri_bo *slice_data_bo,
363                                  int standard_select,
364                                  struct gen7_mfd_context *gen7_mfd_context)
365 {
366     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
367
368     BEGIN_BCS_BATCH(batch, 11);
369     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
370     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
371     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
372     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
373     OUT_BCS_BATCH(batch, 0);
374     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
375     OUT_BCS_BATCH(batch, 0);
376     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
377     OUT_BCS_BATCH(batch, 0);
378     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
379     OUT_BCS_BATCH(batch, 0);
380     ADVANCE_BCS_BATCH(batch);
381 }
382
383 static void
384 gen7_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
385                                  struct decode_state *decode_state,
386                                  int standard_select,
387                                  struct gen7_mfd_context *gen7_mfd_context)
388 {
389     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
390
391     BEGIN_BCS_BATCH(batch, 4);
392     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
393
394     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
395         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
396                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
397                       0);
398     else
399         OUT_BCS_BATCH(batch, 0);
400
401     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
402         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
403                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
404                       0);
405     else
406         OUT_BCS_BATCH(batch, 0);
407
408     if (gen7_mfd_context->bitplane_read_buffer.valid)
409         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
410                       I915_GEM_DOMAIN_INSTRUCTION, 0,
411                       0);
412     else
413         OUT_BCS_BATCH(batch, 0);
414
415     ADVANCE_BCS_BATCH(batch);
416 }
417
418 static void
419 gen7_mfd_aes_state(VADriverContextP ctx,
420                    struct decode_state *decode_state,
421                    int standard_select)
422 {
423     /* FIXME */
424 }
425
426 static void
427 gen7_mfd_qm_state(VADriverContextP ctx,
428                   int qm_type,
429                   unsigned char *qm,
430                   int qm_length,
431                   struct gen7_mfd_context *gen7_mfd_context)
432 {
433     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
434     unsigned int qm_buffer[16];
435
436     assert(qm_length <= 16 * 4);
437     memcpy(qm_buffer, qm, qm_length);
438
439     BEGIN_BCS_BATCH(batch, 18);
440     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
441     OUT_BCS_BATCH(batch, qm_type << 0);
442     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
443     ADVANCE_BCS_BATCH(batch);
444 }
445 static void
446 gen7_mfd_wait(VADriverContextP ctx,
447               struct decode_state *decode_state,
448               int standard_select,
449               struct gen7_mfd_context *gen7_mfd_context)
450 {
451     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
452
453     BEGIN_BCS_BATCH(batch, 1);
454     OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
455     ADVANCE_BCS_BATCH(batch);
456 }
457
458 static void
459 gen7_mfd_avc_img_state(VADriverContextP ctx,
460                        struct decode_state *decode_state,
461                        struct gen7_mfd_context *gen7_mfd_context)
462 {
463     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
464     int img_struct;
465     int mbaff_frame_flag;
466     unsigned int width_in_mbs, height_in_mbs;
467     VAPictureParameterBufferH264 *pic_param;
468
469     assert(decode_state->pic_param && decode_state->pic_param->buffer);
470     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
471     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
472
473     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
474         img_struct = 1;
475     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
476         img_struct = 3;
477     else
478         img_struct = 0;
479
480     if ((img_struct & 0x1) == 0x1) {
481         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
482     } else {
483         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
484     }
485
486     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
487         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
488         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
489     } else {
490         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
491     }
492
493     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
494                         !pic_param->pic_fields.bits.field_pic_flag);
495
496     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
497     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
498
499     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
500     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
501            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
502     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
503
504     BEGIN_BCS_BATCH(batch, 16);
505     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
506     OUT_BCS_BATCH(batch, 
507                   width_in_mbs * height_in_mbs);
508     OUT_BCS_BATCH(batch, 
509                   ((height_in_mbs - 1) << 16) | 
510                   ((width_in_mbs - 1) << 0));
511     OUT_BCS_BATCH(batch, 
512                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
513                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
514                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
515                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
516                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
517                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
518                   (img_struct << 8));
519     OUT_BCS_BATCH(batch,
520                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
521                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
522                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
523                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
524                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
525                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
526                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
527                   (mbaff_frame_flag << 1) |
528                   (pic_param->pic_fields.bits.field_pic_flag << 0));
529     OUT_BCS_BATCH(batch, 0);
530     OUT_BCS_BATCH(batch, 0);
531     OUT_BCS_BATCH(batch, 0);
532     OUT_BCS_BATCH(batch, 0);
533     OUT_BCS_BATCH(batch, 0);
534     OUT_BCS_BATCH(batch, 0);
535     OUT_BCS_BATCH(batch, 0);
536     OUT_BCS_BATCH(batch, 0);
537     OUT_BCS_BATCH(batch, 0);
538     OUT_BCS_BATCH(batch, 0);
539     OUT_BCS_BATCH(batch, 0);
540     ADVANCE_BCS_BATCH(batch);
541 }
542
543 static void
544 gen7_mfd_avc_qm_state(VADriverContextP ctx,
545                       struct decode_state *decode_state,
546                       struct gen7_mfd_context *gen7_mfd_context)
547 {
548     VAIQMatrixBufferH264 *iq_matrix;
549     VAPictureParameterBufferH264 *pic_param;
550
551     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
552         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
553     else
554         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
555
556     assert(decode_state->pic_param && decode_state->pic_param->buffer);
557     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
558
559     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
560     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
561
562     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
563         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
564         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
565     }
566 }
567
568 static void
569 gen7_mfd_avc_directmode_state(VADriverContextP ctx,
570                               VAPictureParameterBufferH264 *pic_param,
571                               VASliceParameterBufferH264 *slice_param,
572                               struct gen7_mfd_context *gen7_mfd_context)
573 {
574     struct i965_driver_data *i965 = i965_driver_data(ctx);
575     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
576     struct object_surface *obj_surface;
577     struct gen7_avc_surface *gen7_avc_surface;
578     VAPictureH264 *va_pic;
579     int i, j;
580
581     BEGIN_BCS_BATCH(batch, 69);
582     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
583
584     /* reference surfaces 0..15 */
585     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
586         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
587             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
588             assert(obj_surface);
589             gen7_avc_surface = obj_surface->private_data;
590
591             if (gen7_avc_surface == NULL) {
592                 OUT_BCS_BATCH(batch, 0);
593                 OUT_BCS_BATCH(batch, 0);
594             } else {
595                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
596                               I915_GEM_DOMAIN_INSTRUCTION, 0,
597                               0);
598
599                 if (gen7_avc_surface->dmv_bottom_flag == 1)
600                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
601                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
602                                   0);
603                 else
604                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
605                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
606                                   0);
607             }
608         } else {
609             OUT_BCS_BATCH(batch, 0);
610             OUT_BCS_BATCH(batch, 0);
611         }
612     }
613
614     /* the current decoding frame/field */
615     va_pic = &pic_param->CurrPic;
616     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
617     obj_surface = SURFACE(va_pic->picture_id);
618     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
619     gen7_avc_surface = obj_surface->private_data;
620
621     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
622                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
623                   0);
624
625     if (gen7_avc_surface->dmv_bottom_flag == 1)
626         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
627                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
628                       0);
629     else
630         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
631                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
632                       0);
633
634     /* POC List */
635     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
636         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
637             int found = 0;
638             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
639                 va_pic = &pic_param->ReferenceFrames[j];
640                 
641                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
642                     continue;
643
644                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
645                     found = 1;
646                     break;
647                 }
648             }
649
650             assert(found == 1);
651             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
652             
653             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
654             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
655         } else {
656             OUT_BCS_BATCH(batch, 0);
657             OUT_BCS_BATCH(batch, 0);
658         }
659     }
660
661     va_pic = &pic_param->CurrPic;
662     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
663     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
664
665     ADVANCE_BCS_BATCH(batch);
666 }
667
668 static void
669 gen7_mfd_avc_slice_state(VADriverContextP ctx,
670                          VAPictureParameterBufferH264 *pic_param,
671                          VASliceParameterBufferH264 *slice_param,
672                          VASliceParameterBufferH264 *next_slice_param,
673                          struct gen7_mfd_context *gen7_mfd_context)
674 {
675     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
676     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
677     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
678     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
679     int num_ref_idx_l0, num_ref_idx_l1;
680     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
681                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
682     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
683     int slice_type;
684
685     if (slice_param->slice_type == SLICE_TYPE_I ||
686         slice_param->slice_type == SLICE_TYPE_SI) {
687         slice_type = SLICE_TYPE_I;
688     } else if (slice_param->slice_type == SLICE_TYPE_P ||
689                slice_param->slice_type == SLICE_TYPE_SP) {
690         slice_type = SLICE_TYPE_P;
691     } else { 
692         assert(slice_param->slice_type == SLICE_TYPE_B);
693         slice_type = SLICE_TYPE_B;
694     }
695
696     if (slice_type == SLICE_TYPE_I) {
697         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
698         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
699         num_ref_idx_l0 = 0;
700         num_ref_idx_l1 = 0;
701     } else if (slice_type == SLICE_TYPE_P) {
702         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
703         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
704         num_ref_idx_l1 = 0;
705     } else {
706         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
707         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
708     }
709
710     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
711     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
712     slice_ver_pos = first_mb_in_slice / width_in_mbs;
713
714     if (next_slice_param) {
715         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
716         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
717         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
718     } else {
719         next_slice_hor_pos = 0;
720         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
721     }
722
723     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
724     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
725     OUT_BCS_BATCH(batch, slice_type);
726     OUT_BCS_BATCH(batch, 
727                   (num_ref_idx_l1 << 24) |
728                   (num_ref_idx_l0 << 16) |
729                   (slice_param->chroma_log2_weight_denom << 8) |
730                   (slice_param->luma_log2_weight_denom << 0));
731     OUT_BCS_BATCH(batch, 
732                   (slice_param->direct_spatial_mv_pred_flag << 29) |
733                   (slice_param->disable_deblocking_filter_idc << 27) |
734                   (slice_param->cabac_init_idc << 24) |
735                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
736                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
737                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
738     OUT_BCS_BATCH(batch, 
739                   (slice_ver_pos << 24) |
740                   (slice_hor_pos << 16) | 
741                   (first_mb_in_slice << 0));
742     OUT_BCS_BATCH(batch,
743                   (next_slice_ver_pos << 16) |
744                   (next_slice_hor_pos << 0));
745     OUT_BCS_BATCH(batch, 
746                   (next_slice_param == NULL) << 19); /* last slice flag */
747     OUT_BCS_BATCH(batch, 0);
748     OUT_BCS_BATCH(batch, 0);
749     OUT_BCS_BATCH(batch, 0);
750     OUT_BCS_BATCH(batch, 0);
751     ADVANCE_BCS_BATCH(batch);
752 }
753
754 static inline void
755 gen7_mfd_avc_ref_idx_state(VADriverContextP ctx,
756                            VAPictureParameterBufferH264 *pic_param,
757                            VASliceParameterBufferH264 *slice_param,
758                            struct gen7_mfd_context *gen7_mfd_context)
759 {
760     gen6_send_avc_ref_idx_state(
761         gen7_mfd_context->base.batch,
762         slice_param,
763         gen7_mfd_context->reference_surface
764     );
765 }
766
767 static void
768 gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
769                                 VAPictureParameterBufferH264 *pic_param,
770                                 VASliceParameterBufferH264 *slice_param,
771                                 struct gen7_mfd_context *gen7_mfd_context)
772 {
773     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
774     int i, j, num_weight_offset_table = 0;
775     short weightoffsets[32 * 6];
776
777     if ((slice_param->slice_type == SLICE_TYPE_P ||
778          slice_param->slice_type == SLICE_TYPE_SP) &&
779         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
780         num_weight_offset_table = 1;
781     }
782     
783     if ((slice_param->slice_type == SLICE_TYPE_B) &&
784         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
785         num_weight_offset_table = 2;
786     }
787
788     for (i = 0; i < num_weight_offset_table; i++) {
789         BEGIN_BCS_BATCH(batch, 98);
790         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
791         OUT_BCS_BATCH(batch, i);
792
793         if (i == 0) {
794             for (j = 0; j < 32; j++) {
795                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
796                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
797                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
798                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
799                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
800                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
801             }
802         } else {
803             for (j = 0; j < 32; j++) {
804                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
805                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
806                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
807                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
808                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
809                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
810             }
811         }
812
813         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
814         ADVANCE_BCS_BATCH(batch);
815     }
816 }
817
818 static void
819 gen7_mfd_avc_bsd_object(VADriverContextP ctx,
820                         VAPictureParameterBufferH264 *pic_param,
821                         VASliceParameterBufferH264 *slice_param,
822                         dri_bo *slice_data_bo,
823                         VASliceParameterBufferH264 *next_slice_param,
824                         struct gen7_mfd_context *gen7_mfd_context)
825 {
826     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
827     unsigned int slice_data_bit_offset;
828
829     slice_data_bit_offset = avc_get_first_mb_bit_offset(
830         slice_data_bo,
831         slice_param,
832         pic_param->pic_fields.bits.entropy_coding_mode_flag
833     );
834
835     /* the input bitsteam format on GEN7 differs from GEN6 */
836     BEGIN_BCS_BATCH(batch, 6);
837     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
838     OUT_BCS_BATCH(batch, 
839                   (slice_param->slice_data_size - slice_param->slice_data_offset));
840     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
841     OUT_BCS_BATCH(batch,
842                   (0 << 31) |
843                   (0 << 14) |
844                   (0 << 12) |
845                   (0 << 10) |
846                   (0 << 8));
847     OUT_BCS_BATCH(batch,
848                   ((slice_data_bit_offset >> 3) << 16) |
849                   (0 << 5)  |
850                   (0 << 4)  |
851                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
852                   (slice_data_bit_offset & 0x7));
853     OUT_BCS_BATCH(batch, 0);
854     ADVANCE_BCS_BATCH(batch);
855 }
856
857 static inline void
858 gen7_mfd_avc_context_init(
859     VADriverContextP         ctx,
860     struct gen7_mfd_context *gen7_mfd_context
861 )
862 {
863     /* Initialize flat scaling lists */
864     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
865 }
866
867 static void
868 gen7_mfd_avc_decode_init(VADriverContextP ctx,
869                          struct decode_state *decode_state,
870                          struct gen7_mfd_context *gen7_mfd_context)
871 {
872     VAPictureParameterBufferH264 *pic_param;
873     VASliceParameterBufferH264 *slice_param;
874     VAPictureH264 *va_pic;
875     struct i965_driver_data *i965 = i965_driver_data(ctx);
876     struct object_surface *obj_surface;
877     dri_bo *bo;
878     int i, j, enable_avc_ildb = 0;
879     unsigned int width_in_mbs, height_in_mbs;
880
881     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
882         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
883         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
884
885         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
886             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
887             assert((slice_param->slice_type == SLICE_TYPE_I) ||
888                    (slice_param->slice_type == SLICE_TYPE_SI) ||
889                    (slice_param->slice_type == SLICE_TYPE_P) ||
890                    (slice_param->slice_type == SLICE_TYPE_SP) ||
891                    (slice_param->slice_type == SLICE_TYPE_B));
892
893             if (slice_param->disable_deblocking_filter_idc != 1) {
894                 enable_avc_ildb = 1;
895                 break;
896             }
897
898             slice_param++;
899         }
900     }
901
902     assert(decode_state->pic_param && decode_state->pic_param->buffer);
903     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
904     gen7_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
905     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
906     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
907     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
908     assert(height_in_mbs > 0 && height_in_mbs <= 256);
909
910     /* Current decoded picture */
911     va_pic = &pic_param->CurrPic;
912     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
913     obj_surface = SURFACE(va_pic->picture_id);
914     assert(obj_surface);
915     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
916     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
917     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
918     gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
919
920     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
921     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
922     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
923     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
924
925     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
926     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
927     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
928     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
929
930     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
931     bo = dri_bo_alloc(i965->intel.bufmgr,
932                       "intra row store",
933                       width_in_mbs * 64,
934                       0x1000);
935     assert(bo);
936     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
937     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
938
939     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
940     bo = dri_bo_alloc(i965->intel.bufmgr,
941                       "deblocking filter row store",
942                       width_in_mbs * 64 * 4,
943                       0x1000);
944     assert(bo);
945     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
946     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
947
948     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
949     bo = dri_bo_alloc(i965->intel.bufmgr,
950                       "bsd mpc row store",
951                       width_in_mbs * 64 * 2,
952                       0x1000);
953     assert(bo);
954     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
955     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
956
957     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
958     bo = dri_bo_alloc(i965->intel.bufmgr,
959                       "mpr row store",
960                       width_in_mbs * 64 * 2,
961                       0x1000);
962     assert(bo);
963     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
964     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
965
966     gen7_mfd_context->bitplane_read_buffer.valid = 0;
967 }
968
969 static void
970 gen7_mfd_avc_decode_picture(VADriverContextP ctx,
971                             struct decode_state *decode_state,
972                             struct gen7_mfd_context *gen7_mfd_context)
973 {
974     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
975     VAPictureParameterBufferH264 *pic_param;
976     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
977     dri_bo *slice_data_bo;
978     int i, j;
979
980     assert(decode_state->pic_param && decode_state->pic_param->buffer);
981     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
982     gen7_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
983
984     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
985     intel_batchbuffer_emit_mi_flush(batch);
986     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
987     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
988     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
989     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
990     gen7_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
991     gen7_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
992
993     for (j = 0; j < decode_state->num_slice_params; j++) {
994         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
995         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
996         slice_data_bo = decode_state->slice_datas[j]->bo;
997         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
998
999         if (j == decode_state->num_slice_params - 1)
1000             next_slice_group_param = NULL;
1001         else
1002             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1003
1004         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1005             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1006             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1007                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1008                    (slice_param->slice_type == SLICE_TYPE_P) ||
1009                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1010                    (slice_param->slice_type == SLICE_TYPE_B));
1011
1012             if (i < decode_state->slice_params[j]->num_elements - 1)
1013                 next_slice_param = slice_param + 1;
1014             else
1015                 next_slice_param = next_slice_group_param;
1016
1017             gen7_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen7_mfd_context);
1018             gen7_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
1019             gen7_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
1020             gen7_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1021             gen7_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1022             slice_param++;
1023         }
1024     }
1025
1026     intel_batchbuffer_end_atomic(batch);
1027     intel_batchbuffer_flush(batch);
1028 }
1029
1030 static void
1031 gen7_mfd_mpeg2_decode_init(VADriverContextP ctx,
1032                            struct decode_state *decode_state,
1033                            struct gen7_mfd_context *gen7_mfd_context)
1034 {
1035     VAPictureParameterBufferMPEG2 *pic_param;
1036     struct i965_driver_data *i965 = i965_driver_data(ctx);
1037     struct object_surface *obj_surface;
1038     dri_bo *bo;
1039     unsigned int width_in_mbs;
1040
1041     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1042     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1043     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1044
1045     mpeg2_set_reference_surfaces(
1046         ctx,
1047         gen7_mfd_context->reference_surface,
1048         decode_state,
1049         pic_param
1050     );
1051
1052     /* Current decoded picture */
1053     obj_surface = SURFACE(decode_state->current_render_target);
1054     assert(obj_surface);
1055     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1056
1057     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1058     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1059     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1060     gen7_mfd_context->pre_deblocking_output.valid = 1;
1061
1062     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1063     bo = dri_bo_alloc(i965->intel.bufmgr,
1064                       "bsd mpc row store",
1065                       width_in_mbs * 96,
1066                       0x1000);
1067     assert(bo);
1068     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1069     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1070
1071     gen7_mfd_context->post_deblocking_output.valid = 0;
1072     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1073     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1074     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1075     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1076 }
1077
1078 static void
1079 gen7_mfd_mpeg2_pic_state(VADriverContextP ctx,
1080                          struct decode_state *decode_state,
1081                          struct gen7_mfd_context *gen7_mfd_context)
1082 {
1083     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1084     VAPictureParameterBufferMPEG2 *pic_param;
1085
1086     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1087     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1088
1089     BEGIN_BCS_BATCH(batch, 13);
1090     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1091     OUT_BCS_BATCH(batch,
1092                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1093                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1094                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1095                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1096                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1097                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1098                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1099                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1100                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1101                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1102                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1103                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1104     OUT_BCS_BATCH(batch,
1105                   pic_param->picture_coding_type << 9);
1106     OUT_BCS_BATCH(batch,
1107                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1108                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1109     OUT_BCS_BATCH(batch, 0);
1110     OUT_BCS_BATCH(batch, 0);
1111     OUT_BCS_BATCH(batch, 0);
1112     OUT_BCS_BATCH(batch, 0);
1113     OUT_BCS_BATCH(batch, 0);
1114     OUT_BCS_BATCH(batch, 0);
1115     OUT_BCS_BATCH(batch, 0);
1116     OUT_BCS_BATCH(batch, 0);
1117     OUT_BCS_BATCH(batch, 0);
1118     ADVANCE_BCS_BATCH(batch);
1119 }
1120
1121 static void
1122 gen7_mfd_mpeg2_qm_state(VADriverContextP ctx,
1123                         struct decode_state *decode_state,
1124                         struct gen7_mfd_context *gen7_mfd_context)
1125 {
1126     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1127     int i, j;
1128
1129     /* Update internal QM state */
1130     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1131         VAIQMatrixBufferMPEG2 * const iq_matrix =
1132             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1133
1134         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1135             iq_matrix->load_intra_quantiser_matrix) {
1136             gen_iq_matrix->load_intra_quantiser_matrix =
1137                 iq_matrix->load_intra_quantiser_matrix;
1138             if (iq_matrix->load_intra_quantiser_matrix) {
1139                 for (j = 0; j < 64; j++)
1140                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1141                         iq_matrix->intra_quantiser_matrix[j];
1142             }
1143         }
1144
1145         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1146             iq_matrix->load_non_intra_quantiser_matrix) {
1147             gen_iq_matrix->load_non_intra_quantiser_matrix =
1148                 iq_matrix->load_non_intra_quantiser_matrix;
1149             if (iq_matrix->load_non_intra_quantiser_matrix) {
1150                 for (j = 0; j < 64; j++)
1151                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1152                         iq_matrix->non_intra_quantiser_matrix[j];
1153             }
1154         }
1155     }
1156
1157     /* Commit QM state to HW */
1158     for (i = 0; i < 2; i++) {
1159         unsigned char *qm = NULL;
1160         int qm_type;
1161
1162         if (i == 0) {
1163             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1164                 qm = gen_iq_matrix->intra_quantiser_matrix;
1165                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1166             }
1167         } else {
1168             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1169                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1170                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1171             }
1172         }
1173
1174         if (!qm)
1175             continue;
1176
1177         gen7_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1178     }
1179 }
1180
1181 static void
1182 gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1183                           VAPictureParameterBufferMPEG2 *pic_param,
1184                           VASliceParameterBufferMPEG2 *slice_param,
1185                           VASliceParameterBufferMPEG2 *next_slice_param,
1186                           struct gen7_mfd_context *gen7_mfd_context)
1187 {
1188     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1189     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1190     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1191
1192     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1193         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1194         is_field_pic = 1;
1195     is_field_pic_wa = is_field_pic &&
1196         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1197
1198     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1199     hpos0 = slice_param->slice_horizontal_position;
1200
1201     if (next_slice_param == NULL) {
1202         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1203         hpos1 = 0;
1204     } else {
1205         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1206         hpos1 = next_slice_param->slice_horizontal_position;
1207     }
1208
1209     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1210
1211     BEGIN_BCS_BATCH(batch, 5);
1212     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1213     OUT_BCS_BATCH(batch, 
1214                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1215     OUT_BCS_BATCH(batch, 
1216                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1217     OUT_BCS_BATCH(batch,
1218                   hpos0 << 24 |
1219                   vpos0 << 16 |
1220                   mb_count << 8 |
1221                   (next_slice_param == NULL) << 5 |
1222                   (next_slice_param == NULL) << 3 |
1223                   (slice_param->macroblock_offset & 0x7));
1224     OUT_BCS_BATCH(batch,
1225                   slice_param->quantiser_scale_code << 24);
1226     ADVANCE_BCS_BATCH(batch);
1227 }
1228
1229 static void
1230 gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1231                               struct decode_state *decode_state,
1232                               struct gen7_mfd_context *gen7_mfd_context)
1233 {
1234     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1235     VAPictureParameterBufferMPEG2 *pic_param;
1236     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1237     dri_bo *slice_data_bo;
1238     int i, j;
1239
1240     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1241     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1242
1243     gen7_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1244     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1245     intel_batchbuffer_emit_mi_flush(batch);
1246     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1247     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1248     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1249     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1250     gen7_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1251     gen7_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1252
1253     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1254         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1255             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1256
1257     for (j = 0; j < decode_state->num_slice_params; j++) {
1258         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1259         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1260         slice_data_bo = decode_state->slice_datas[j]->bo;
1261         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1262
1263         if (j == decode_state->num_slice_params - 1)
1264             next_slice_group_param = NULL;
1265         else
1266             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1267
1268         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1269             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1270
1271             if (i < decode_state->slice_params[j]->num_elements - 1)
1272                 next_slice_param = slice_param + 1;
1273             else
1274                 next_slice_param = next_slice_group_param;
1275
1276             gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1277             slice_param++;
1278         }
1279     }
1280
1281     intel_batchbuffer_end_atomic(batch);
1282     intel_batchbuffer_flush(batch);
1283 }
1284
1285 static const int va_to_gen7_vc1_pic_type[5] = {
1286     GEN7_VC1_I_PICTURE,
1287     GEN7_VC1_P_PICTURE,
1288     GEN7_VC1_B_PICTURE,
1289     GEN7_VC1_BI_PICTURE,
1290     GEN7_VC1_P_PICTURE,
1291 };
1292
1293 static const int va_to_gen7_vc1_mv[4] = {
1294     1, /* 1-MV */
1295     2, /* 1-MV half-pel */
1296     3, /* 1-MV half-pef bilinear */
1297     0, /* Mixed MV */
1298 };
1299
1300 static const int b_picture_scale_factor[21] = {
1301     128, 85,  170, 64,  192,
1302     51,  102, 153, 204, 43,
1303     215, 37,  74,  111, 148,
1304     185, 222, 32,  96,  160, 
1305     224,
1306 };
1307
1308 static const int va_to_gen7_vc1_condover[3] = {
1309     0,
1310     2,
1311     3
1312 };
1313
1314 static const int va_to_gen7_vc1_profile[4] = {
1315     GEN7_VC1_SIMPLE_PROFILE,
1316     GEN7_VC1_MAIN_PROFILE,
1317     GEN7_VC1_RESERVED_PROFILE,
1318     GEN7_VC1_ADVANCED_PROFILE
1319 };
1320
1321 static void 
1322 gen7_mfd_free_vc1_surface(void **data)
1323 {
1324     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1325
1326     if (!gen7_vc1_surface)
1327         return;
1328
1329     dri_bo_unreference(gen7_vc1_surface->dmv);
1330     free(gen7_vc1_surface);
1331     *data = NULL;
1332 }
1333
1334 static void
1335 gen7_mfd_init_vc1_surface(VADriverContextP ctx, 
1336                           VAPictureParameterBufferVC1 *pic_param,
1337                           struct object_surface *obj_surface)
1338 {
1339     struct i965_driver_data *i965 = i965_driver_data(ctx);
1340     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1341     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1342     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1343
1344     obj_surface->free_private_data = gen7_mfd_free_vc1_surface;
1345
1346     if (!gen7_vc1_surface) {
1347         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1348         assert((obj_surface->size & 0x3f) == 0);
1349         obj_surface->private_data = gen7_vc1_surface;
1350     }
1351
1352     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1353
1354     if (gen7_vc1_surface->dmv == NULL) {
1355         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1356                                              "direct mv w/r buffer",
1357                                              width_in_mbs * height_in_mbs * 64,
1358                                              0x1000);
1359     }
1360 }
1361
1362 static void
1363 gen7_mfd_vc1_decode_init(VADriverContextP ctx,
1364                          struct decode_state *decode_state,
1365                          struct gen7_mfd_context *gen7_mfd_context)
1366 {
1367     VAPictureParameterBufferVC1 *pic_param;
1368     struct i965_driver_data *i965 = i965_driver_data(ctx);
1369     struct object_surface *obj_surface;
1370     int i;
1371     dri_bo *bo;
1372     int width_in_mbs;
1373
1374     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1375     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1376     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1377
1378     /* reference picture */
1379     obj_surface = SURFACE(pic_param->forward_reference_picture);
1380
1381     if (obj_surface && obj_surface->bo)
1382         gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1383     else
1384         gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1385
1386     obj_surface = SURFACE(pic_param->backward_reference_picture);
1387
1388     if (obj_surface && obj_surface->bo)
1389         gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1390     else
1391         gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1392
1393     /* must do so !!! */
1394     for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
1395         gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
1396
1397     /* Current decoded picture */
1398     obj_surface = SURFACE(decode_state->current_render_target);
1399     assert(obj_surface);
1400     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1401     gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1402
1403     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1404     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1405     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1406     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1407
1408     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1409     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1410     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1411     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1412
1413     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1414     bo = dri_bo_alloc(i965->intel.bufmgr,
1415                       "intra row store",
1416                       width_in_mbs * 64,
1417                       0x1000);
1418     assert(bo);
1419     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1420     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1421
1422     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1423     bo = dri_bo_alloc(i965->intel.bufmgr,
1424                       "deblocking filter row store",
1425                       width_in_mbs * 6 * 64,
1426                       0x1000);
1427     assert(bo);
1428     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1429     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1430
1431     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1432     bo = dri_bo_alloc(i965->intel.bufmgr,
1433                       "bsd mpc row store",
1434                       width_in_mbs * 96,
1435                       0x1000);
1436     assert(bo);
1437     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1438     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1439
1440     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1441
1442     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1443     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1444     
1445     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1446         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1447         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1448         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1449         int src_w, src_h;
1450         uint8_t *src = NULL, *dst = NULL;
1451
1452         assert(decode_state->bit_plane->buffer);
1453         src = decode_state->bit_plane->buffer;
1454
1455         bo = dri_bo_alloc(i965->intel.bufmgr,
1456                           "VC-1 Bitplane",
1457                           bitplane_width * height_in_mbs,
1458                           0x1000);
1459         assert(bo);
1460         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1461
1462         dri_bo_map(bo, True);
1463         assert(bo->virtual);
1464         dst = bo->virtual;
1465
1466         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1467             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1468                 int src_index, dst_index;
1469                 int src_shift;
1470                 uint8_t src_value;
1471
1472                 src_index = (src_h * width_in_mbs + src_w) / 2;
1473                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1474                 src_value = ((src[src_index] >> src_shift) & 0xf);
1475
1476                 dst_index = src_w / 2;
1477                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1478             }
1479
1480             if (src_w & 1)
1481                 dst[src_w / 2] >>= 4;
1482
1483             dst += bitplane_width;
1484         }
1485
1486         dri_bo_unmap(bo);
1487     } else
1488         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1489 }
1490
1491 static void
1492 gen7_mfd_vc1_pic_state(VADriverContextP ctx,
1493                        struct decode_state *decode_state,
1494                        struct gen7_mfd_context *gen7_mfd_context)
1495 {
1496     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1497     VAPictureParameterBufferVC1 *pic_param;
1498     struct i965_driver_data *i965 = i965_driver_data(ctx);
1499     struct object_surface *obj_surface;
1500     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1501     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1502     int unified_mv_mode;
1503     int ref_field_pic_polarity = 0;
1504     int scale_factor = 0;
1505     int trans_ac_y = 0;
1506     int dmv_surface_valid = 0;
1507     int brfd = 0;
1508     int fcm = 0;
1509     int picture_type;
1510     int profile;
1511     int overlap;
1512     int interpolation_mode = 0;
1513
1514     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1515     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1516
1517     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1518     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1519     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1520     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1521     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1522     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1523     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1524     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1525
1526     if (dquant == 0) {
1527         alt_pquant_config = 0;
1528         alt_pquant_edge_mask = 0;
1529     } else if (dquant == 2) {
1530         alt_pquant_config = 1;
1531         alt_pquant_edge_mask = 0xf;
1532     } else {
1533         assert(dquant == 1);
1534         if (dquantfrm == 0) {
1535             alt_pquant_config = 0;
1536             alt_pquant_edge_mask = 0;
1537             alt_pq = 0;
1538         } else {
1539             assert(dquantfrm == 1);
1540             alt_pquant_config = 1;
1541
1542             switch (dqprofile) {
1543             case 3:
1544                 if (dqbilevel == 0) {
1545                     alt_pquant_config = 2;
1546                     alt_pquant_edge_mask = 0;
1547                 } else {
1548                     assert(dqbilevel == 1);
1549                     alt_pquant_config = 3;
1550                     alt_pquant_edge_mask = 0;
1551                 }
1552                 break;
1553                 
1554             case 0:
1555                 alt_pquant_edge_mask = 0xf;
1556                 break;
1557
1558             case 1:
1559                 if (dqdbedge == 3)
1560                     alt_pquant_edge_mask = 0x9;
1561                 else
1562                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1563
1564                 break;
1565
1566             case 2:
1567                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1568                 break;
1569
1570             default:
1571                 assert(0);
1572             }
1573         }
1574     }
1575
1576     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1577         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1578         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1579     } else {
1580         assert(pic_param->mv_fields.bits.mv_mode < 4);
1581         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1582     }
1583
1584     if (pic_param->sequence_fields.bits.interlace == 1 &&
1585         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1586         /* FIXME: calculate reference field picture polarity */
1587         assert(0);
1588         ref_field_pic_polarity = 0;
1589     }
1590
1591     if (pic_param->b_picture_fraction < 21)
1592         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1593
1594     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1595     
1596     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1597         picture_type == GEN7_VC1_I_PICTURE)
1598         picture_type = GEN7_VC1_BI_PICTURE;
1599
1600     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1601         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1602     else {
1603         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1604         /*
1605          * 8.3.6.2.1 Transform Type Selection
1606          * If variable-sized transform coding is not enabled,
1607          * then the 8x8 transform shall be used for all blocks.
1608          * it is also MFX_VC1_PIC_STATE requirement.
1609          */
1610         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1611             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1612             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1613         }
1614     }
1615
1616
1617     if (picture_type == GEN7_VC1_B_PICTURE) {
1618         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1619
1620         obj_surface = SURFACE(pic_param->backward_reference_picture);
1621         assert(obj_surface);
1622         gen7_vc1_surface = obj_surface->private_data;
1623
1624         if (!gen7_vc1_surface || 
1625             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1626              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1627             dmv_surface_valid = 0;
1628         else
1629             dmv_surface_valid = 1;
1630     }
1631
1632     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1633
1634     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1635         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1636     else {
1637         if (pic_param->picture_fields.bits.top_field_first)
1638             fcm = 2;
1639         else
1640             fcm = 3;
1641     }
1642
1643     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1644         brfd = pic_param->reference_fields.bits.reference_distance;
1645         brfd = (scale_factor * brfd) >> 8;
1646         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1647
1648         if (brfd < 0)
1649             brfd = 0;
1650     }
1651
1652     overlap = pic_param->sequence_fields.bits.overlap;
1653     if (profile != GEN7_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1654         overlap = 0;
1655
1656     assert(pic_param->conditional_overlap_flag < 3);
1657     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1658
1659     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1660         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1661          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1662         interpolation_mode = 9; /* Half-pel bilinear */
1663     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1664              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1665               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1666         interpolation_mode = 1; /* Half-pel bicubic */
1667     else
1668         interpolation_mode = 0; /* Quarter-pel bicubic */
1669
1670     BEGIN_BCS_BATCH(batch, 6);
1671     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1672     OUT_BCS_BATCH(batch,
1673                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1674                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1675     OUT_BCS_BATCH(batch,
1676                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1677                   dmv_surface_valid << 15 |
1678                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1679                   pic_param->rounding_control << 13 |
1680                   pic_param->sequence_fields.bits.syncmarker << 12 |
1681                   interpolation_mode << 8 |
1682                   0 << 7 | /* FIXME: scale up or down ??? */
1683                   pic_param->range_reduction_frame << 6 |
1684                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1685                   overlap << 4 |
1686                   !pic_param->picture_fields.bits.is_first_field << 3 |
1687                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1688     OUT_BCS_BATCH(batch,
1689                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1690                   picture_type << 26 |
1691                   fcm << 24 |
1692                   alt_pq << 16 |
1693                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1694                   scale_factor << 0);
1695     OUT_BCS_BATCH(batch,
1696                   unified_mv_mode << 28 |
1697                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1698                   pic_param->fast_uvmc_flag << 26 |
1699                   ref_field_pic_polarity << 25 |
1700                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1701                   pic_param->reference_fields.bits.reference_distance << 20 |
1702                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1703                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1704                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1705                   alt_pquant_edge_mask << 4 |
1706                   alt_pquant_config << 2 |
1707                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1708                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1709     OUT_BCS_BATCH(batch,
1710                   !!pic_param->bitplane_present.value << 31 |
1711                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1712                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1713                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1714                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1715                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1716                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1717                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1718                   pic_param->mv_fields.bits.mv_table << 20 |
1719                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1720                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1721                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1722                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1723                   pic_param->mb_mode_table << 8 |
1724                   trans_ac_y << 6 |
1725                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1726                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1727                   pic_param->cbp_table << 0);
1728     ADVANCE_BCS_BATCH(batch);
1729 }
1730
1731 static void
1732 gen7_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1733                              struct decode_state *decode_state,
1734                              struct gen7_mfd_context *gen7_mfd_context)
1735 {
1736     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1737     VAPictureParameterBufferVC1 *pic_param;
1738     int intensitycomp_single;
1739
1740     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1741     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1742
1743     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1744     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1745     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1746
1747     BEGIN_BCS_BATCH(batch, 6);
1748     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1749     OUT_BCS_BATCH(batch,
1750                   0 << 14 | /* FIXME: double ??? */
1751                   0 << 12 |
1752                   intensitycomp_single << 10 |
1753                   intensitycomp_single << 8 |
1754                   0 << 4 | /* FIXME: interlace mode */
1755                   0);
1756     OUT_BCS_BATCH(batch,
1757                   pic_param->luma_shift << 16 |
1758                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1759     OUT_BCS_BATCH(batch, 0);
1760     OUT_BCS_BATCH(batch, 0);
1761     OUT_BCS_BATCH(batch, 0);
1762     ADVANCE_BCS_BATCH(batch);
1763 }
1764
1765
1766 static void
1767 gen7_mfd_vc1_directmode_state(VADriverContextP ctx,
1768                               struct decode_state *decode_state,
1769                               struct gen7_mfd_context *gen7_mfd_context)
1770 {
1771     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1772     VAPictureParameterBufferVC1 *pic_param;
1773     struct i965_driver_data *i965 = i965_driver_data(ctx);
1774     struct object_surface *obj_surface;
1775     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1776
1777     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1778     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1779
1780     obj_surface = SURFACE(decode_state->current_render_target);
1781
1782     if (obj_surface && obj_surface->private_data) {
1783         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1784     }
1785
1786     obj_surface = SURFACE(pic_param->backward_reference_picture);
1787
1788     if (obj_surface && obj_surface->private_data) {
1789         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1790     }
1791
1792     BEGIN_BCS_BATCH(batch, 3);
1793     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1794
1795     if (dmv_write_buffer)
1796         OUT_BCS_RELOC(batch, dmv_write_buffer,
1797                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1798                       0);
1799     else
1800         OUT_BCS_BATCH(batch, 0);
1801
1802     if (dmv_read_buffer)
1803         OUT_BCS_RELOC(batch, dmv_read_buffer,
1804                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1805                       0);
1806     else
1807         OUT_BCS_BATCH(batch, 0);
1808                   
1809     ADVANCE_BCS_BATCH(batch);
1810 }
1811
1812 static int
1813 gen7_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1814 {
1815     int out_slice_data_bit_offset;
1816     int slice_header_size = in_slice_data_bit_offset / 8;
1817     int i, j;
1818
1819     if (profile != 3)
1820         out_slice_data_bit_offset = in_slice_data_bit_offset;
1821     else {
1822         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1823             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1824                 i++, j += 2;
1825             }
1826         }
1827
1828         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1829     }
1830
1831     return out_slice_data_bit_offset;
1832 }
1833
1834 static void
1835 gen7_mfd_vc1_bsd_object(VADriverContextP ctx,
1836                         VAPictureParameterBufferVC1 *pic_param,
1837                         VASliceParameterBufferVC1 *slice_param,
1838                         VASliceParameterBufferVC1 *next_slice_param,
1839                         dri_bo *slice_data_bo,
1840                         struct gen7_mfd_context *gen7_mfd_context)
1841 {
1842     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1843     int next_slice_start_vert_pos;
1844     int macroblock_offset;
1845     uint8_t *slice_data = NULL;
1846
1847     dri_bo_map(slice_data_bo, 0);
1848     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1849     macroblock_offset = gen7_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1850                                                                slice_param->macroblock_offset,
1851                                                                pic_param->sequence_fields.bits.profile);
1852     dri_bo_unmap(slice_data_bo);
1853
1854     if (next_slice_param)
1855         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1856     else
1857         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1858
1859     BEGIN_BCS_BATCH(batch, 5);
1860     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1861     OUT_BCS_BATCH(batch, 
1862                   slice_param->slice_data_size - (macroblock_offset >> 3));
1863     OUT_BCS_BATCH(batch, 
1864                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1865     OUT_BCS_BATCH(batch,
1866                   slice_param->slice_vertical_position << 16 |
1867                   next_slice_start_vert_pos << 0);
1868     OUT_BCS_BATCH(batch,
1869                   (macroblock_offset & 0x7));
1870     ADVANCE_BCS_BATCH(batch);
1871 }
1872
1873 static void
1874 gen7_mfd_vc1_decode_picture(VADriverContextP ctx,
1875                             struct decode_state *decode_state,
1876                             struct gen7_mfd_context *gen7_mfd_context)
1877 {
1878     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1879     VAPictureParameterBufferVC1 *pic_param;
1880     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1881     dri_bo *slice_data_bo;
1882     int i, j;
1883
1884     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1885     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1886
1887     gen7_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1888     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1889     intel_batchbuffer_emit_mi_flush(batch);
1890     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1891     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1892     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1893     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1894     gen7_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1895     gen7_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1896     gen7_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1897
1898     for (j = 0; j < decode_state->num_slice_params; j++) {
1899         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1900         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1901         slice_data_bo = decode_state->slice_datas[j]->bo;
1902         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1903
1904         if (j == decode_state->num_slice_params - 1)
1905             next_slice_group_param = NULL;
1906         else
1907             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1908
1909         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1910             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1911
1912             if (i < decode_state->slice_params[j]->num_elements - 1)
1913                 next_slice_param = slice_param + 1;
1914             else
1915                 next_slice_param = next_slice_group_param;
1916
1917             gen7_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1918             slice_param++;
1919         }
1920     }
1921
1922     intel_batchbuffer_end_atomic(batch);
1923     intel_batchbuffer_flush(batch);
1924 }
1925
1926 static void
1927 gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
1928                           struct decode_state *decode_state,
1929                           struct gen7_mfd_context *gen7_mfd_context)
1930 {
1931     struct i965_driver_data *i965 = i965_driver_data(ctx);
1932     struct object_surface *obj_surface;
1933     VAPictureParameterBufferJPEGBaseline *pic_param;
1934     int subsampling = SUBSAMPLE_YUV420;
1935
1936     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1937
1938     if (pic_param->num_components == 1)
1939         subsampling = SUBSAMPLE_YUV400;
1940     else if (pic_param->num_components == 3) {
1941         int h1 = pic_param->components[0].h_sampling_factor;
1942         int h2 = pic_param->components[1].h_sampling_factor;
1943         int h3 = pic_param->components[2].h_sampling_factor;
1944         int v1 = pic_param->components[0].v_sampling_factor;
1945         int v2 = pic_param->components[1].v_sampling_factor;
1946         int v3 = pic_param->components[2].v_sampling_factor;
1947
1948         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1949             v1 == 2 && v2 == 1 && v3 == 1)
1950             subsampling = SUBSAMPLE_YUV420;
1951         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1952                  v1 == 1 && v2 == 1 && v3 == 1)
1953             subsampling = SUBSAMPLE_YUV422H;
1954         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1955                  v1 == 1 && v2 == 1 && v3 == 1)
1956             subsampling = SUBSAMPLE_YUV444;
1957         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1958                  v1 == 1 && v2 == 1 && v3 == 1)
1959             subsampling = SUBSAMPLE_YUV411;
1960         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1961                  v1 == 2 && v2 == 1 && v3 == 1)
1962             subsampling = SUBSAMPLE_YUV422V;
1963         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1964                  v1 == 2 && v2 == 2 && v3 == 2)
1965             subsampling = SUBSAMPLE_YUV422H;
1966         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1967                  v1 == 2 && v2 == 1 && v3 == 1)
1968             subsampling = SUBSAMPLE_YUV422V;
1969         else
1970             assert(0);
1971     } else {
1972         assert(0);
1973     }
1974
1975     /* Current decoded picture */
1976     obj_surface = SURFACE(decode_state->current_render_target);
1977     assert(obj_surface);
1978     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
1979
1980     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1981     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1982     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1983     gen7_mfd_context->pre_deblocking_output.valid = 1;
1984
1985     gen7_mfd_context->post_deblocking_output.bo = NULL;
1986     gen7_mfd_context->post_deblocking_output.valid = 0;
1987
1988     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1989     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1990
1991     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1992     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1993
1994     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1995     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1996
1997     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1998     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1999
2000     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2001     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2002 }
2003
2004 static const int va_to_gen7_jpeg_rotation[4] = {
2005     GEN7_JPEG_ROTATION_0,
2006     GEN7_JPEG_ROTATION_90,
2007     GEN7_JPEG_ROTATION_180,
2008     GEN7_JPEG_ROTATION_270
2009 };
2010
2011 static void
2012 gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
2013                         struct decode_state *decode_state,
2014                         struct gen7_mfd_context *gen7_mfd_context)
2015 {
2016     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2017     VAPictureParameterBufferJPEGBaseline *pic_param;
2018     int chroma_type = GEN7_YUV420;
2019     int frame_width_in_blks;
2020     int frame_height_in_blks;
2021
2022     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2023     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2024
2025     if (pic_param->num_components == 1)
2026         chroma_type = GEN7_YUV400;
2027     else if (pic_param->num_components == 3) {
2028         int h1 = pic_param->components[0].h_sampling_factor;
2029         int h2 = pic_param->components[1].h_sampling_factor;
2030         int h3 = pic_param->components[2].h_sampling_factor;
2031         int v1 = pic_param->components[0].v_sampling_factor;
2032         int v2 = pic_param->components[1].v_sampling_factor;
2033         int v3 = pic_param->components[2].v_sampling_factor;
2034
2035         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2036             v1 == 2 && v2 == 1 && v3 == 1)
2037             chroma_type = GEN7_YUV420;
2038         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2039                  v1 == 1 && v2 == 1 && v3 == 1)
2040             chroma_type = GEN7_YUV422H_2Y;
2041         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2042                  v1 == 1 && v2 == 1 && v3 == 1)
2043             chroma_type = GEN7_YUV444;
2044         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2045                  v1 == 1 && v2 == 1 && v3 == 1)
2046             chroma_type = GEN7_YUV411;
2047         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2048                  v1 == 2 && v2 == 1 && v3 == 1)
2049             chroma_type = GEN7_YUV422V_2Y;
2050         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2051                  v1 == 2 && v2 == 2 && v3 == 2)
2052             chroma_type = GEN7_YUV422H_4Y;
2053         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2054                  v1 == 2 && v2 == 1 && v3 == 1)
2055             chroma_type = GEN7_YUV422V_4Y;
2056         else
2057             assert(0);
2058     }
2059
2060     if (chroma_type == GEN7_YUV400 ||
2061         chroma_type == GEN7_YUV444 ||
2062         chroma_type == GEN7_YUV422V_2Y) {
2063         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2064         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2065     } else if (chroma_type == GEN7_YUV411) {
2066         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2067         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2068     } else {
2069         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2070         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2071     }
2072
2073     BEGIN_BCS_BATCH(batch, 3);
2074     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2075     OUT_BCS_BATCH(batch,
2076                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2077                   (chroma_type << 0));
2078     OUT_BCS_BATCH(batch,
2079                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2080                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2081     ADVANCE_BCS_BATCH(batch);
2082 }
2083
2084 static const int va_to_gen7_jpeg_hufftable[2] = {
2085     MFX_HUFFTABLE_ID_Y,
2086     MFX_HUFFTABLE_ID_UV
2087 };
2088
2089 static void
2090 gen7_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2091                                struct decode_state *decode_state,
2092                                struct gen7_mfd_context *gen7_mfd_context,
2093                                int num_tables)
2094 {
2095     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2096     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2097     int index;
2098
2099     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2100         return;
2101
2102     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2103
2104     for (index = 0; index < num_tables; index++) {
2105         int id = va_to_gen7_jpeg_hufftable[index];
2106         BEGIN_BCS_BATCH(batch, 53);
2107         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2108         OUT_BCS_BATCH(batch, id);
2109         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2110         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2111         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2112         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2113         ADVANCE_BCS_BATCH(batch);
2114     }
2115 }
2116
2117 static const int va_to_gen7_jpeg_qm[5] = {
2118     -1,
2119     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2120     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2121     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2122     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2123 };
2124
2125 static void
2126 gen7_mfd_jpeg_qm_state(VADriverContextP ctx,
2127                        struct decode_state *decode_state,
2128                        struct gen7_mfd_context *gen7_mfd_context)
2129 {
2130     VAPictureParameterBufferJPEGBaseline *pic_param;
2131     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2132     int index;
2133
2134     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2135         return;
2136
2137     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2138     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2139
2140     assert(pic_param->num_components <= 3);
2141
2142     for (index = 0; index < pic_param->num_components; index++) {
2143         int qm_type = va_to_gen7_jpeg_qm[pic_param->components[index].component_id - pic_param->components[0].component_id + 1];
2144         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2145         unsigned char raster_qm[64];
2146         int j;
2147
2148         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2149             continue;
2150
2151         for (j = 0; j < 64; j++)
2152             raster_qm[zigzag_direct[j]] = qm[j];
2153
2154         gen7_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2155     }
2156 }
2157
2158 static void
2159 gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
2160                          VAPictureParameterBufferJPEGBaseline *pic_param,
2161                          VASliceParameterBufferJPEGBaseline *slice_param,
2162                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2163                          dri_bo *slice_data_bo,
2164                          struct gen7_mfd_context *gen7_mfd_context)
2165 {
2166     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2167     int scan_component_mask = 0;
2168     int i;
2169
2170     assert(slice_param->num_components > 0);
2171     assert(slice_param->num_components < 4);
2172     assert(slice_param->num_components <= pic_param->num_components);
2173
2174     for (i = 0; i < slice_param->num_components; i++) {
2175         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2176         case 1:
2177             scan_component_mask |= (1 << 0);
2178             break;
2179         case 2:
2180             scan_component_mask |= (1 << 1);
2181             break;
2182         case 3:
2183             scan_component_mask |= (1 << 2);
2184             break;
2185         default:
2186             assert(0);
2187             break;
2188         }
2189     }
2190
2191     BEGIN_BCS_BATCH(batch, 6);
2192     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2193     OUT_BCS_BATCH(batch, 
2194                   slice_param->slice_data_size);
2195     OUT_BCS_BATCH(batch, 
2196                   slice_param->slice_data_offset);
2197     OUT_BCS_BATCH(batch,
2198                   slice_param->slice_horizontal_position << 16 |
2199                   slice_param->slice_vertical_position << 0);
2200     OUT_BCS_BATCH(batch,
2201                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2202                   (scan_component_mask << 27) |                 /* scan components */
2203                   (0 << 26) |   /* disable interrupt allowed */
2204                   (slice_param->num_mcus << 0));                /* MCU count */
2205     OUT_BCS_BATCH(batch,
2206                   (slice_param->restart_interval << 0));    /* RestartInterval */
2207     ADVANCE_BCS_BATCH(batch);
2208 }
2209
2210 /* Workaround for JPEG decoding on Ivybridge */
2211
2212 VAStatus 
2213 i965_DestroySurfaces(VADriverContextP ctx,
2214                      VASurfaceID *surface_list,
2215                      int num_surfaces);
2216 VAStatus 
2217 i965_CreateSurfaces(VADriverContextP ctx,
2218                     int width,
2219                     int height,
2220                     int format,
2221                     int num_surfaces,
2222                     VASurfaceID *surfaces);
2223
2224 static struct {
2225     int width;
2226     int height;
2227     unsigned char data[32];
2228     int data_size;
2229     int data_bit_offset;
2230     int qp;
2231 } gen7_jpeg_wa_clip = {
2232     16,
2233     16,
2234     {
2235         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2236         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2237     },
2238     14,
2239     40,
2240     28,
2241 };
2242
2243 static void
2244 gen7_jpeg_wa_init(VADriverContextP ctx,
2245                   struct gen7_mfd_context *gen7_mfd_context)
2246 {
2247     struct i965_driver_data *i965 = i965_driver_data(ctx);
2248     VAStatus status;
2249     struct object_surface *obj_surface;
2250
2251     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2252         i965_DestroySurfaces(ctx,
2253                              &gen7_mfd_context->jpeg_wa_surface_id,
2254                              1);
2255
2256     status = i965_CreateSurfaces(ctx,
2257                                  gen7_jpeg_wa_clip.width,
2258                                  gen7_jpeg_wa_clip.height,
2259                                  VA_RT_FORMAT_YUV420,
2260                                  1,
2261                                  &gen7_mfd_context->jpeg_wa_surface_id);
2262     assert(status == VA_STATUS_SUCCESS);
2263
2264     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2265     assert(obj_surface);
2266     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2267
2268     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2269         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2270                                                                "JPEG WA data",
2271                                                                0x1000,
2272                                                                0x1000);
2273         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2274                        0,
2275                        gen7_jpeg_wa_clip.data_size,
2276                        gen7_jpeg_wa_clip.data);
2277     }
2278 }
2279
2280 static void
2281 gen7_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2282                               struct gen7_mfd_context *gen7_mfd_context)
2283 {
2284     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2285
2286     BEGIN_BCS_BATCH(batch, 5);
2287     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2288     OUT_BCS_BATCH(batch,
2289                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2290                   (MFD_MODE_VLD << 15) | /* VLD mode */
2291                   (0 << 10) | /* disable Stream-Out */
2292                   (0 << 9)  | /* Post Deblocking Output */
2293                   (1 << 8)  | /* Pre Deblocking Output */
2294                   (0 << 5)  | /* not in stitch mode */
2295                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2296                   (MFX_FORMAT_AVC << 0));
2297     OUT_BCS_BATCH(batch,
2298                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2299                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2300                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2301                   (0 << 1)  |
2302                   (0 << 0));
2303     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2304     OUT_BCS_BATCH(batch, 0); /* reserved */
2305     ADVANCE_BCS_BATCH(batch);
2306 }
2307
2308 static void
2309 gen7_jpeg_wa_surface_state(VADriverContextP ctx,
2310                            struct gen7_mfd_context *gen7_mfd_context)
2311 {
2312     struct i965_driver_data *i965 = i965_driver_data(ctx);
2313     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2314     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2315
2316     BEGIN_BCS_BATCH(batch, 6);
2317     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2318     OUT_BCS_BATCH(batch, 0);
2319     OUT_BCS_BATCH(batch,
2320                   ((obj_surface->orig_width - 1) << 18) |
2321                   ((obj_surface->orig_height - 1) << 4));
2322     OUT_BCS_BATCH(batch,
2323                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2324                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2325                   (0 << 22) | /* surface object control state, ignored */
2326                   ((obj_surface->width - 1) << 3) | /* pitch */
2327                   (0 << 2)  | /* must be 0 */
2328                   (1 << 1)  | /* must be tiled */
2329                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2330     OUT_BCS_BATCH(batch,
2331                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2332                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2333     OUT_BCS_BATCH(batch,
2334                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2335                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2336     ADVANCE_BCS_BATCH(batch);
2337 }
2338
2339 static void
2340 gen7_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2341                                  struct gen7_mfd_context *gen7_mfd_context)
2342 {
2343     struct i965_driver_data *i965 = i965_driver_data(ctx);
2344     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2345     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2346     dri_bo *intra_bo;
2347     int i;
2348
2349     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2350                             "intra row store",
2351                             128 * 64,
2352                             0x1000);
2353
2354     BEGIN_BCS_BATCH(batch, 24);
2355     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
2356     OUT_BCS_RELOC(batch,
2357                   obj_surface->bo,
2358                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2359                   0);
2360     
2361     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2362
2363     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2364     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2365
2366     OUT_BCS_RELOC(batch,
2367                   intra_bo,
2368                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2369                   0);
2370
2371     OUT_BCS_BATCH(batch, 0);
2372
2373     /* DW 7..22 */
2374     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2375         OUT_BCS_BATCH(batch, 0);
2376     }
2377
2378     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2379     ADVANCE_BCS_BATCH(batch);
2380
2381     dri_bo_unreference(intra_bo);
2382 }
2383
2384 static void
2385 gen7_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2386                                      struct gen7_mfd_context *gen7_mfd_context)
2387 {
2388     struct i965_driver_data *i965 = i965_driver_data(ctx);
2389     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2390     dri_bo *bsd_mpc_bo, *mpr_bo;
2391
2392     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2393                               "bsd mpc row store",
2394                               11520, /* 1.5 * 120 * 64 */
2395                               0x1000);
2396
2397     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2398                           "mpr row store",
2399                           7680, /* 1. 0 * 120 * 64 */
2400                           0x1000);
2401
2402     BEGIN_BCS_BATCH(batch, 4);
2403     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2404
2405     OUT_BCS_RELOC(batch,
2406                   bsd_mpc_bo,
2407                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2408                   0);
2409
2410     OUT_BCS_RELOC(batch,
2411                   mpr_bo,
2412                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2413                   0);
2414     OUT_BCS_BATCH(batch, 0);
2415
2416     ADVANCE_BCS_BATCH(batch);
2417
2418     dri_bo_unreference(bsd_mpc_bo);
2419     dri_bo_unreference(mpr_bo);
2420 }
2421
2422 static void
2423 gen7_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2424                           struct gen7_mfd_context *gen7_mfd_context)
2425 {
2426
2427 }
2428
2429 static void
2430 gen7_jpeg_wa_avc_img_state(VADriverContextP ctx,
2431                            struct gen7_mfd_context *gen7_mfd_context)
2432 {
2433     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2434     int img_struct = 0;
2435     int mbaff_frame_flag = 0;
2436     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2437
2438     BEGIN_BCS_BATCH(batch, 16);
2439     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2440     OUT_BCS_BATCH(batch, 
2441                   width_in_mbs * height_in_mbs);
2442     OUT_BCS_BATCH(batch, 
2443                   ((height_in_mbs - 1) << 16) | 
2444                   ((width_in_mbs - 1) << 0));
2445     OUT_BCS_BATCH(batch, 
2446                   (0 << 24) |
2447                   (0 << 16) |
2448                   (0 << 14) |
2449                   (0 << 13) |
2450                   (0 << 12) | /* differ from GEN6 */
2451                   (0 << 10) |
2452                   (img_struct << 8));
2453     OUT_BCS_BATCH(batch,
2454                   (1 << 10) | /* 4:2:0 */
2455                   (1 << 7) |  /* CABAC */
2456                   (0 << 6) |
2457                   (0 << 5) |
2458                   (0 << 4) |
2459                   (0 << 3) |
2460                   (1 << 2) |
2461                   (mbaff_frame_flag << 1) |
2462                   (0 << 0));
2463     OUT_BCS_BATCH(batch, 0);
2464     OUT_BCS_BATCH(batch, 0);
2465     OUT_BCS_BATCH(batch, 0);
2466     OUT_BCS_BATCH(batch, 0);
2467     OUT_BCS_BATCH(batch, 0);
2468     OUT_BCS_BATCH(batch, 0);
2469     OUT_BCS_BATCH(batch, 0);
2470     OUT_BCS_BATCH(batch, 0);
2471     OUT_BCS_BATCH(batch, 0);
2472     OUT_BCS_BATCH(batch, 0);
2473     OUT_BCS_BATCH(batch, 0);
2474     ADVANCE_BCS_BATCH(batch);
2475 }
2476
2477 static void
2478 gen7_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2479                                   struct gen7_mfd_context *gen7_mfd_context)
2480 {
2481     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2482     int i;
2483
2484     BEGIN_BCS_BATCH(batch, 69);
2485     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
2486
2487     /* reference surfaces 0..15 */
2488     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2489         OUT_BCS_BATCH(batch, 0); /* top */
2490         OUT_BCS_BATCH(batch, 0); /* bottom */
2491     }
2492
2493     /* the current decoding frame/field */
2494     OUT_BCS_BATCH(batch, 0); /* top */
2495     OUT_BCS_BATCH(batch, 0); /* bottom */
2496
2497     /* POC List */
2498     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2499         OUT_BCS_BATCH(batch, 0);
2500         OUT_BCS_BATCH(batch, 0);
2501     }
2502
2503     OUT_BCS_BATCH(batch, 0);
2504     OUT_BCS_BATCH(batch, 0);
2505
2506     ADVANCE_BCS_BATCH(batch);
2507 }
2508
2509 static void
2510 gen7_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2511                                      struct gen7_mfd_context *gen7_mfd_context)
2512 {
2513     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2514
2515     BEGIN_BCS_BATCH(batch, 11);
2516     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2517     OUT_BCS_RELOC(batch,
2518                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2519                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2520                   0);
2521     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2522     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2523     OUT_BCS_BATCH(batch, 0);
2524     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2525     OUT_BCS_BATCH(batch, 0);
2526     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2527     OUT_BCS_BATCH(batch, 0);
2528     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2529     OUT_BCS_BATCH(batch, 0);
2530     ADVANCE_BCS_BATCH(batch);
2531 }
2532
2533 static void
2534 gen7_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2535                             struct gen7_mfd_context *gen7_mfd_context)
2536 {
2537     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2538
2539     /* the input bitsteam format on GEN7 differs from GEN6 */
2540     BEGIN_BCS_BATCH(batch, 6);
2541     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2542     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2543     OUT_BCS_BATCH(batch, 0);
2544     OUT_BCS_BATCH(batch,
2545                   (0 << 31) |
2546                   (0 << 14) |
2547                   (0 << 12) |
2548                   (0 << 10) |
2549                   (0 << 8));
2550     OUT_BCS_BATCH(batch,
2551                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2552                   (0 << 5)  |
2553                   (0 << 4)  |
2554                   (1 << 3) | /* LastSlice Flag */
2555                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2556     OUT_BCS_BATCH(batch, 0);
2557     ADVANCE_BCS_BATCH(batch);
2558 }
2559
2560 static void
2561 gen7_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2562                              struct gen7_mfd_context *gen7_mfd_context)
2563 {
2564     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2565     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2566     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2567     int first_mb_in_slice = 0;
2568     int slice_type = SLICE_TYPE_I;
2569
2570     BEGIN_BCS_BATCH(batch, 11);
2571     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2572     OUT_BCS_BATCH(batch, slice_type);
2573     OUT_BCS_BATCH(batch, 
2574                   (num_ref_idx_l1 << 24) |
2575                   (num_ref_idx_l0 << 16) |
2576                   (0 << 8) |
2577                   (0 << 0));
2578     OUT_BCS_BATCH(batch, 
2579                   (0 << 29) |
2580                   (1 << 27) |   /* disable Deblocking */
2581                   (0 << 24) |
2582                   (gen7_jpeg_wa_clip.qp << 16) |
2583                   (0 << 8) |
2584                   (0 << 0));
2585     OUT_BCS_BATCH(batch, 
2586                   (slice_ver_pos << 24) |
2587                   (slice_hor_pos << 16) | 
2588                   (first_mb_in_slice << 0));
2589     OUT_BCS_BATCH(batch,
2590                   (next_slice_ver_pos << 16) |
2591                   (next_slice_hor_pos << 0));
2592     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2593     OUT_BCS_BATCH(batch, 0);
2594     OUT_BCS_BATCH(batch, 0);
2595     OUT_BCS_BATCH(batch, 0);
2596     OUT_BCS_BATCH(batch, 0);
2597     ADVANCE_BCS_BATCH(batch);
2598 }
2599
2600 static void
2601 gen7_mfd_jpeg_wa(VADriverContextP ctx,
2602                  struct gen7_mfd_context *gen7_mfd_context)
2603 {
2604     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2605     gen7_jpeg_wa_init(ctx, gen7_mfd_context);
2606     intel_batchbuffer_emit_mi_flush(batch);
2607     gen7_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2608     gen7_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2609     gen7_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2610     gen7_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2611     gen7_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2612     gen7_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2613     gen7_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2614
2615     gen7_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2616     gen7_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2617     gen7_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2618 }
2619
2620 void
2621 gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
2622                              struct decode_state *decode_state,
2623                              struct gen7_mfd_context *gen7_mfd_context)
2624 {
2625     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2626     VAPictureParameterBufferJPEGBaseline *pic_param;
2627     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2628     dri_bo *slice_data_bo;
2629     int i, j, max_selector = 0;
2630
2631     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2632     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2633
2634     /* Currently only support Baseline DCT */
2635     gen7_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2636     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2637     gen7_mfd_jpeg_wa(ctx, gen7_mfd_context);
2638     intel_batchbuffer_emit_mi_flush(batch);
2639     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2640     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2641     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2642     gen7_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2643     gen7_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2644
2645     for (j = 0; j < decode_state->num_slice_params; j++) {
2646         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2647         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2648         slice_data_bo = decode_state->slice_datas[j]->bo;
2649         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2650
2651         if (j == decode_state->num_slice_params - 1)
2652             next_slice_group_param = NULL;
2653         else
2654             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2655
2656         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2657             int component;
2658
2659             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2660
2661             if (i < decode_state->slice_params[j]->num_elements - 1)
2662                 next_slice_param = slice_param + 1;
2663             else
2664                 next_slice_param = next_slice_group_param;
2665
2666             for (component = 0; component < slice_param->num_components; component++) {
2667                 if (max_selector < slice_param->components[component].dc_table_selector)
2668                     max_selector = slice_param->components[component].dc_table_selector;
2669
2670                 if (max_selector < slice_param->components[component].ac_table_selector)
2671                     max_selector = slice_param->components[component].ac_table_selector;
2672             }
2673
2674             slice_param++;
2675         }
2676     }
2677
2678     assert(max_selector < 2);
2679     gen7_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2680
2681     for (j = 0; j < decode_state->num_slice_params; j++) {
2682         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2683         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2684         slice_data_bo = decode_state->slice_datas[j]->bo;
2685         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2686
2687         if (j == decode_state->num_slice_params - 1)
2688             next_slice_group_param = NULL;
2689         else
2690             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2691
2692         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2693             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2694
2695             if (i < decode_state->slice_params[j]->num_elements - 1)
2696                 next_slice_param = slice_param + 1;
2697             else
2698                 next_slice_param = next_slice_group_param;
2699
2700             gen7_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2701             slice_param++;
2702         }
2703     }
2704
2705     intel_batchbuffer_end_atomic(batch);
2706     intel_batchbuffer_flush(batch);
2707 }
2708
2709 static void 
2710 gen7_mfd_decode_picture(VADriverContextP ctx, 
2711                         VAProfile profile, 
2712                         union codec_state *codec_state,
2713                         struct hw_context *hw_context)
2714
2715 {
2716     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2717     struct decode_state *decode_state = &codec_state->decode;
2718
2719     assert(gen7_mfd_context);
2720
2721     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
2722
2723     switch (profile) {
2724     case VAProfileMPEG2Simple:
2725     case VAProfileMPEG2Main:
2726         gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
2727         break;
2728         
2729     case VAProfileH264Baseline:
2730     case VAProfileH264Main:
2731     case VAProfileH264High:
2732         gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
2733         break;
2734
2735     case VAProfileVC1Simple:
2736     case VAProfileVC1Main:
2737     case VAProfileVC1Advanced:
2738         gen7_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
2739         break;
2740
2741     case VAProfileJPEGBaseline:
2742         gen7_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
2743         break;
2744
2745     default:
2746         assert(0);
2747         break;
2748     }
2749 }
2750
2751 static void
2752 gen7_mfd_context_destroy(void *hw_context)
2753 {
2754     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2755
2756     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2757     gen7_mfd_context->post_deblocking_output.bo = NULL;
2758
2759     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2760     gen7_mfd_context->pre_deblocking_output.bo = NULL;
2761
2762     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2763     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2764
2765     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2766     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2767
2768     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2769     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2770
2771     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2772     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2773
2774     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
2775     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2776
2777     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
2778
2779     intel_batchbuffer_free(gen7_mfd_context->base.batch);
2780     free(gen7_mfd_context);
2781 }
2782
2783 static void gen7_mfd_mpeg2_context_init(VADriverContextP ctx,
2784                                     struct gen7_mfd_context *gen7_mfd_context)
2785 {
2786     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
2787     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
2788     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
2789     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
2790 }
2791
2792 struct hw_context *
2793 gen7_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
2794 {
2795     struct intel_driver_data *intel = intel_driver_data(ctx);
2796     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
2797     int i;
2798
2799     gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
2800     gen7_mfd_context->base.run = gen7_mfd_decode_picture;
2801     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
2802
2803     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
2804         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2805         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
2806     }
2807
2808     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
2809
2810     switch (profile) {
2811     case VAProfileMPEG2Simple:
2812     case VAProfileMPEG2Main:
2813         gen7_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
2814         break;
2815
2816     case VAProfileH264Baseline:
2817     case VAProfileH264Main:
2818     case VAProfileH264High:
2819         gen7_mfd_avc_context_init(ctx, gen7_mfd_context);
2820         break;
2821     default:
2822         break;
2823     }
2824     return (struct hw_context *)gen7_mfd_context;
2825 }