OSDN Git Service

Moved files around.
[android-x86/hardware-intel-common-vaapi.git] / src / gen6_mfd.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38
39 #include "i965_defines.h"
40 #include "i965_drv_video.h"
41
42 #include "gen6_mfd.h"
43
44 #define DMV_SIZE        0x88000 /* 557056 bytes for a frame */
45
46 static const uint32_t zigzag_direct[64] = {
47     0,   1,  8, 16,  9,  2,  3, 10,
48     17, 24, 32, 25, 18, 11,  4,  5,
49     12, 19, 26, 33, 40, 48, 41, 34,
50     27, 20, 13,  6,  7, 14, 21, 28,
51     35, 42, 49, 56, 57, 50, 43, 36,
52     29, 22, 15, 23, 30, 37, 44, 51,
53     58, 59, 52, 45, 38, 31, 39, 46,
54     53, 60, 61, 54, 47, 55, 62, 63
55 };
56
57 static void
58 gen6_mfd_avc_frame_store_index(VADriverContextP ctx,
59                                VAPictureParameterBufferH264 *pic_param,
60                                struct gen6_mfd_context *gen6_mfd_context)
61 {
62     struct i965_driver_data *i965 = i965_driver_data(ctx);
63     int i, j;
64
65     assert(ARRAY_ELEMS(gen6_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
66
67     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
68         int found = 0;
69
70         if (gen6_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
71             continue;
72
73         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
74             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
75             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
76                 continue;
77
78             if (gen6_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
79                 found = 1;
80                 break;
81             }
82         }
83
84         if (!found) {
85             struct object_surface *obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
86             obj_surface->flags &= ~SURFACE_REFERENCED;
87
88             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
89                 dri_bo_unreference(obj_surface->bo);
90                 obj_surface->bo = NULL;
91                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
92             }
93
94             if (obj_surface->free_private_data)
95                 obj_surface->free_private_data(&obj_surface->private_data);
96
97             gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
98             gen6_mfd_context->reference_surface[i].frame_store_id = -1;
99         }
100     }
101
102     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
103         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
104         int found = 0;
105
106         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
107             continue;
108
109         for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
110             if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
111                 continue;
112             
113             if (gen6_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
114                 found = 1;
115                 break;
116             }
117         }
118
119         if (!found) {
120             int frame_idx;
121             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
122             
123             assert(obj_surface);
124             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'));
125
126             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
127                 for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
128                     if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
129                         continue;
130
131                     if (gen6_mfd_context->reference_surface[j].frame_store_id == frame_idx)
132                         break;
133                 }
134
135                 if (j == ARRAY_ELEMS(gen6_mfd_context->reference_surface))
136                     break;
137             }
138
139             assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
140
141             for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
142                 if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
143                     gen6_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
144                     gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
145                     break;
146                 }
147             }
148         }
149     }
150
151     /* sort */
152     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface) - 1; i++) {
153         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
154             gen6_mfd_context->reference_surface[i].frame_store_id == i)
155             continue;
156
157         for (j = i + 1; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
158             if (gen6_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
159                 gen6_mfd_context->reference_surface[j].frame_store_id == i) {
160                 VASurfaceID id = gen6_mfd_context->reference_surface[i].surface_id;
161                 int frame_idx = gen6_mfd_context->reference_surface[i].frame_store_id;
162
163                 gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[j].surface_id;
164                 gen6_mfd_context->reference_surface[i].frame_store_id = gen6_mfd_context->reference_surface[j].frame_store_id;
165                 gen6_mfd_context->reference_surface[j].surface_id = id;
166                 gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
167                 break;
168             }
169         }
170     }
171 }
172
173 static void 
174 gen6_mfd_free_avc_surface(void **data)
175 {
176     struct gen6_avc_surface *gen6_avc_surface = *data;
177
178     if (!gen6_avc_surface)
179         return;
180
181     dri_bo_unreference(gen6_avc_surface->dmv_top);
182     gen6_avc_surface->dmv_top = NULL;
183     dri_bo_unreference(gen6_avc_surface->dmv_bottom);
184     gen6_avc_surface->dmv_bottom = NULL;
185
186     free(gen6_avc_surface);
187     *data = NULL;
188 }
189
190 static void
191 gen6_mfd_init_avc_surface(VADriverContextP ctx, 
192                           VAPictureParameterBufferH264 *pic_param,
193                           struct object_surface *obj_surface)
194 {
195     struct i965_driver_data *i965 = i965_driver_data(ctx);
196     struct gen6_avc_surface *gen6_avc_surface = obj_surface->private_data;
197
198     obj_surface->free_private_data = gen6_mfd_free_avc_surface;
199
200     if (!gen6_avc_surface) {
201         gen6_avc_surface = calloc(sizeof(struct gen6_avc_surface), 1);
202         assert((obj_surface->size & 0x3f) == 0);
203         obj_surface->private_data = gen6_avc_surface;
204     }
205
206     gen6_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
207                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
208
209     if (gen6_avc_surface->dmv_top == NULL) {
210         gen6_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
211                                                  "direct mv w/r buffer",
212                                                  DMV_SIZE,
213                                                  0x1000);
214     }
215
216     if (gen6_avc_surface->dmv_bottom_flag &&
217         gen6_avc_surface->dmv_bottom == NULL) {
218         gen6_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
219                                                     "direct mv w/r buffer",
220                                                     DMV_SIZE,
221                                                     0x1000);
222     }
223 }
224
225 static void
226 gen6_mfd_pipe_mode_select(VADriverContextP ctx,
227                           struct decode_state *decode_state,
228                           int standard_select,
229                           struct gen6_mfd_context *gen6_mfd_context)
230 {
231     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
232
233     assert(standard_select == MFX_FORMAT_MPEG2 ||
234            standard_select == MFX_FORMAT_AVC ||
235            standard_select == MFX_FORMAT_VC1);
236
237     BEGIN_BCS_BATCH(batch, 4);
238     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
239     OUT_BCS_BATCH(batch,
240                   (MFD_MODE_VLD << 16) | /* VLD mode */
241                   (0 << 10) | /* disable Stream-Out */
242                   (gen6_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
243                   (gen6_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
244                   (0 << 7)  | /* disable TLB prefectch */
245                   (0 << 5)  | /* not in stitch mode */
246                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
247                   (standard_select << 0));
248     OUT_BCS_BATCH(batch,
249                   (0 << 20) | /* round flag in PB slice */
250                   (0 << 19) | /* round flag in Intra8x8 */
251                   (0 << 7)  | /* expand NOA bus flag */
252                   (1 << 6)  | /* must be 1 */
253                   (0 << 5)  | /* disable clock gating for NOA */
254                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
255                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
256                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
257                   (0 << 1)  | /* AVC long field motion vector */
258                   (1 << 0));  /* always calculate AVC ILDB boundary strength */
259     OUT_BCS_BATCH(batch, 0);
260     ADVANCE_BCS_BATCH(batch);
261 }
262
263 static void
264 gen6_mfd_surface_state(VADriverContextP ctx,
265                        struct decode_state *decode_state,
266                        int standard_select,
267                        struct gen6_mfd_context *gen6_mfd_context)
268 {
269     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
270     struct i965_driver_data *i965 = i965_driver_data(ctx);
271     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
272     assert(obj_surface);
273     
274     BEGIN_BCS_BATCH(batch, 6);
275     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch,
278                   ((obj_surface->orig_height - 1) << 19) |
279                   ((obj_surface->orig_width - 1) << 6));
280     OUT_BCS_BATCH(batch,
281                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
282                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
283                   (0 << 22) | /* surface object control state, FIXME??? */
284                   ((obj_surface->width - 1) << 3) | /* pitch */
285                   (0 << 2)  | /* must be 0 for interleave U/V */
286                   (1 << 1)  | /* must be y-tiled */
287                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, FIXME: must be 1 ??? */
288     OUT_BCS_BATCH(batch,
289                   (0 << 16) | /* must be 0 for interleave U/V */
290                   (obj_surface->height)); /* y offset for U(cb) */
291     OUT_BCS_BATCH(batch, 0);
292     ADVANCE_BCS_BATCH(batch);
293 }
294
295 static void
296 gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx,
297                              struct decode_state *decode_state,
298                              int standard_select,
299                              struct gen6_mfd_context *gen6_mfd_context)
300 {
301     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
302     struct i965_driver_data *i965 = i965_driver_data(ctx);
303     int i;
304
305     BEGIN_BCS_BATCH(batch, 24);
306     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
307     if (gen6_mfd_context->pre_deblocking_output.valid)
308         OUT_BCS_RELOC(batch, gen6_mfd_context->pre_deblocking_output.bo,
309                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
310                       0);
311     else
312         OUT_BCS_BATCH(batch, 0);
313
314     if (gen6_mfd_context->post_deblocking_output.valid)
315         OUT_BCS_RELOC(batch, gen6_mfd_context->post_deblocking_output.bo,
316                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
317                       0);
318     else
319         OUT_BCS_BATCH(batch, 0);
320
321     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
322     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
323
324     if (gen6_mfd_context->intra_row_store_scratch_buffer.valid)
325         OUT_BCS_RELOC(batch, gen6_mfd_context->intra_row_store_scratch_buffer.bo,
326                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
327                       0);
328     else
329         OUT_BCS_BATCH(batch, 0);
330
331     if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
332         OUT_BCS_RELOC(batch, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
333                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
334                       0);
335     else
336         OUT_BCS_BATCH(batch, 0);
337
338     /* DW 7..22 */
339     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
340         struct object_surface *obj_surface;
341
342         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
343             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
344             assert(obj_surface && obj_surface->bo);
345
346             OUT_BCS_RELOC(batch, obj_surface->bo,
347                           I915_GEM_DOMAIN_INSTRUCTION, 0,
348                           0);
349         } else {
350             OUT_BCS_BATCH(batch, 0);
351         }
352     }
353
354     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
355     ADVANCE_BCS_BATCH(batch);
356 }
357
358 static void
359 gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
360                                  dri_bo *slice_data_bo,
361                                  int standard_select,
362                                  struct gen6_mfd_context *gen6_mfd_context)
363 {
364     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
365
366     BEGIN_BCS_BATCH(batch, 11);
367     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
368     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
369     OUT_BCS_BATCH(batch, 0);
370     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
371     OUT_BCS_BATCH(batch, 0);
372     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
373     OUT_BCS_BATCH(batch, 0);
374     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
375     OUT_BCS_BATCH(batch, 0);
376     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
377     OUT_BCS_BATCH(batch, 0);
378     ADVANCE_BCS_BATCH(batch);
379 }
380
381 static void
382 gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
383                                  struct decode_state *decode_state,
384                                  int standard_select,
385                                  struct gen6_mfd_context *gen6_mfd_context)
386 {
387     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
388
389     BEGIN_BCS_BATCH(batch, 4);
390     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
391
392     if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
393         OUT_BCS_RELOC(batch, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
394                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
395                       0);
396     else
397         OUT_BCS_BATCH(batch, 0);
398
399     if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid)
400         OUT_BCS_RELOC(batch, gen6_mfd_context->mpr_row_store_scratch_buffer.bo,
401                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
402                       0);
403     else
404         OUT_BCS_BATCH(batch, 0);
405
406     if (gen6_mfd_context->bitplane_read_buffer.valid)
407         OUT_BCS_RELOC(batch, gen6_mfd_context->bitplane_read_buffer.bo,
408                       I915_GEM_DOMAIN_INSTRUCTION, 0,
409                       0);
410     else
411         OUT_BCS_BATCH(batch, 0);
412
413     ADVANCE_BCS_BATCH(batch);
414 }
415
416 static void
417 gen6_mfd_aes_state(VADriverContextP ctx,
418                    struct decode_state *decode_state,
419                    int standard_select)
420 {
421     /* FIXME */
422 }
423
424 static void
425 gen6_mfd_wait(VADriverContextP ctx,
426               struct decode_state *decode_state,
427               int standard_select,
428               struct gen6_mfd_context *gen6_mfd_context)
429 {
430     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
431
432     BEGIN_BCS_BATCH(batch, 1);
433     OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
434     ADVANCE_BCS_BATCH(batch);
435 }
436
437 static void
438 gen6_mfd_avc_img_state(VADriverContextP ctx,
439                        struct decode_state *decode_state,
440                        struct gen6_mfd_context *gen6_mfd_context)
441 {
442     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
443     int qm_present_flag;
444     int img_struct;
445     int mbaff_frame_flag;
446     unsigned int width_in_mbs, height_in_mbs;
447     VAPictureParameterBufferH264 *pic_param;
448
449     assert(decode_state->pic_param && decode_state->pic_param->buffer);
450     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
451     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
452
453     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
454         qm_present_flag = 1;
455     else
456         qm_present_flag = 0; /* built-in QM matrices */
457
458     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
459         img_struct = 1;
460     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
461         img_struct = 3;
462     else
463         img_struct = 0;
464
465     if ((img_struct & 0x1) == 0x1) {
466         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
467     } else {
468         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
469     }
470
471     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
472         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
473         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
474     } else {
475         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
476     }
477
478     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
479                         !pic_param->pic_fields.bits.field_pic_flag);
480
481     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
482     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
483     assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
484
485     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
486     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
487            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
488     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
489
490     BEGIN_BCS_BATCH(batch, 13);
491     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
492     OUT_BCS_BATCH(batch, 
493                   ((width_in_mbs * height_in_mbs) & 0x7fff));
494     OUT_BCS_BATCH(batch, 
495                   (height_in_mbs << 16) | 
496                   (width_in_mbs << 0));
497     OUT_BCS_BATCH(batch, 
498                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
499                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
500                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
501                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
502                   (1 << 12) | /* always 1, hardware requirement */
503                   (qm_present_flag << 10) |
504                   (img_struct << 8) |
505                   (16 << 0));
506     OUT_BCS_BATCH(batch,
507                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
508                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
509                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
510                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
511                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
512                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
513                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
514                   (mbaff_frame_flag << 1) |
515                   (pic_param->pic_fields.bits.field_pic_flag << 0));
516     OUT_BCS_BATCH(batch, 0);
517     OUT_BCS_BATCH(batch, 0);
518     OUT_BCS_BATCH(batch, 0);
519     OUT_BCS_BATCH(batch, 0);
520     OUT_BCS_BATCH(batch, 0);
521     OUT_BCS_BATCH(batch, 0);
522     OUT_BCS_BATCH(batch, 0);
523     OUT_BCS_BATCH(batch, 0);
524     ADVANCE_BCS_BATCH(batch);
525 }
526
527 static void
528 gen6_mfd_avc_qm_state(VADriverContextP ctx,
529                       struct decode_state *decode_state,
530                       struct gen6_mfd_context *gen6_mfd_context)
531 {
532     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
533     int cmd_len;
534     VAIQMatrixBufferH264 *iq_matrix;
535     VAPictureParameterBufferH264 *pic_param;
536
537     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
538         return;
539
540     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
541
542     assert(decode_state->pic_param && decode_state->pic_param->buffer);
543     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
544
545     cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
546
547     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
548         cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
549
550     BEGIN_BCS_BATCH(batch, cmd_len);
551     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | (cmd_len - 2));
552
553     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
554         OUT_BCS_BATCH(batch, 
555                       (0x0  << 8) | /* don't use default built-in matrices */
556                       (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
557     else
558         OUT_BCS_BATCH(batch, 
559                       (0x0  << 8) | /* don't use default built-in matrices */
560                       (0x3f << 0)); /* six 4x4 scaling matrices */
561
562     intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
563
564     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
565         intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
566
567     ADVANCE_BCS_BATCH(batch);
568 }
569
570 static void
571 gen6_mfd_avc_directmode_state(VADriverContextP ctx,
572                               VAPictureParameterBufferH264 *pic_param,
573                               VASliceParameterBufferH264 *slice_param,
574                               struct gen6_mfd_context *gen6_mfd_context)
575 {
576     struct i965_driver_data *i965 = i965_driver_data(ctx);
577     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
578     struct object_surface *obj_surface;
579     struct gen6_avc_surface *gen6_avc_surface;
580     VAPictureH264 *va_pic;
581     int i, j;
582
583     BEGIN_BCS_BATCH(batch, 69);
584     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
585
586     /* reference surfaces 0..15 */
587     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
588         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
589             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
590             assert(obj_surface);
591             gen6_avc_surface = obj_surface->private_data;
592
593             if (gen6_avc_surface == NULL) {
594                 OUT_BCS_BATCH(batch, 0);
595                 OUT_BCS_BATCH(batch, 0);
596             } else {
597                 OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
598                               I915_GEM_DOMAIN_INSTRUCTION, 0,
599                               0);
600
601                 if (gen6_avc_surface->dmv_bottom_flag == 1)
602                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
603                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
604                                   0);
605                 else
606                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
607                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
608                                   0);
609             }
610         } else {
611             OUT_BCS_BATCH(batch, 0);
612             OUT_BCS_BATCH(batch, 0);
613         }
614     }
615
616     /* the current decoding frame/field */
617     va_pic = &pic_param->CurrPic;
618     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
619     obj_surface = SURFACE(va_pic->picture_id);
620     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
621     gen6_avc_surface = obj_surface->private_data;
622
623     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
624                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
625                   0);
626
627     if (gen6_avc_surface->dmv_bottom_flag == 1)
628         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
629                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
630                       0);
631     else
632         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
633                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
634                       0);
635
636     /* POC List */
637     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
638         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
639             int found = 0;
640             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
641                 va_pic = &pic_param->ReferenceFrames[j];
642                 
643                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
644                     continue;
645
646                 if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) {
647                     found = 1;
648                     break;
649                 }
650             }
651
652             assert(found == 1);
653             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
654             
655             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
656             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
657         } else {
658             OUT_BCS_BATCH(batch, 0);
659             OUT_BCS_BATCH(batch, 0);
660         }
661     }
662
663     va_pic = &pic_param->CurrPic;
664     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
665     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
666
667     ADVANCE_BCS_BATCH(batch);
668 }
669
670 static void
671 gen6_mfd_avc_slice_state(VADriverContextP ctx,
672                          VAPictureParameterBufferH264 *pic_param,
673                          VASliceParameterBufferH264 *slice_param,
674                          VASliceParameterBufferH264 *next_slice_param,
675                          struct gen6_mfd_context *gen6_mfd_context)
676 {
677     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
678     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
679     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
680     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
681     int num_ref_idx_l0, num_ref_idx_l1;
682     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
683                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
684     int weighted_pred_idc = 0;
685     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
686     int slice_type;
687
688     if (slice_param->slice_type == SLICE_TYPE_I ||
689         slice_param->slice_type == SLICE_TYPE_SI) {
690         slice_type = SLICE_TYPE_I;
691     } else if (slice_param->slice_type == SLICE_TYPE_P ||
692                slice_param->slice_type == SLICE_TYPE_SP) {
693         slice_type = SLICE_TYPE_P;
694     } else { 
695         assert(slice_param->slice_type == SLICE_TYPE_B);
696         slice_type = SLICE_TYPE_B;
697     }
698
699     if (slice_type == SLICE_TYPE_I) {
700         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
701         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
702         num_ref_idx_l0 = 0;
703         num_ref_idx_l1 = 0;
704     } else if (slice_type == SLICE_TYPE_P) {
705         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
706         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
707         num_ref_idx_l1 = 0;
708         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_pred_flag == 1);
709     } else {
710         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
711         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
712         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_bipred_idc == 1);
713     }
714
715     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
716     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
717     slice_ver_pos = first_mb_in_slice / width_in_mbs;
718
719     if (next_slice_param) {
720         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
721         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
722         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
723     } else {
724         next_slice_hor_pos = 0;
725         next_slice_ver_pos = height_in_mbs;
726     }
727
728     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
729     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
730     OUT_BCS_BATCH(batch, slice_type);
731     OUT_BCS_BATCH(batch, 
732                   (num_ref_idx_l1 << 24) |
733                   (num_ref_idx_l0 << 16) |
734                   (slice_param->chroma_log2_weight_denom << 8) |
735                   (slice_param->luma_log2_weight_denom << 0));
736     OUT_BCS_BATCH(batch, 
737                   (weighted_pred_idc << 30) |
738                   (slice_param->direct_spatial_mv_pred_flag << 29) |
739                   (slice_param->disable_deblocking_filter_idc << 27) |
740                   (slice_param->cabac_init_idc << 24) |
741                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
742                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
743                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
744     OUT_BCS_BATCH(batch, 
745                   (slice_ver_pos << 24) |
746                   (slice_hor_pos << 16) | 
747                   (first_mb_in_slice << 0));
748     OUT_BCS_BATCH(batch,
749                   (next_slice_ver_pos << 16) |
750                   (next_slice_hor_pos << 0));
751     OUT_BCS_BATCH(batch, 
752                   (next_slice_param == NULL) << 19); /* last slice flag */
753     OUT_BCS_BATCH(batch, 0);
754     OUT_BCS_BATCH(batch, 0);
755     OUT_BCS_BATCH(batch, 0);
756     OUT_BCS_BATCH(batch, 0);
757     ADVANCE_BCS_BATCH(batch);
758 }
759
760 static void
761 gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx,
762                                  VAPictureParameterBufferH264 *pic_param,
763                                  struct gen6_mfd_context *gen6_mfd_context)
764 {
765     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
766     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
767     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
768
769     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
770     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
771     OUT_BCS_BATCH(batch, 0);
772     OUT_BCS_BATCH(batch, 0);
773     OUT_BCS_BATCH(batch, 0);
774     OUT_BCS_BATCH(batch,
775                   height_in_mbs << 24 |
776                   width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
777     OUT_BCS_BATCH(batch, 0);
778     OUT_BCS_BATCH(batch, 0);
779     OUT_BCS_BATCH(batch, 0);
780     OUT_BCS_BATCH(batch, 0);
781     OUT_BCS_BATCH(batch, 0);
782     OUT_BCS_BATCH(batch, 0);
783     ADVANCE_BCS_BATCH(batch);
784 }
785
786 static void
787 gen6_mfd_avc_ref_idx_state(VADriverContextP ctx,
788                            VAPictureParameterBufferH264 *pic_param,
789                            VASliceParameterBufferH264 *slice_param,
790                            struct gen6_mfd_context *gen6_mfd_context)
791 {
792     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
793     int i, j, num_ref_list;
794     struct {
795         unsigned char bottom_idc:1;
796         unsigned char frame_store_index:4;
797         unsigned char field_picture:1;
798         unsigned char long_term:1;
799         unsigned char non_exist:1;
800     } refs[32];
801
802     if (slice_param->slice_type == SLICE_TYPE_I ||
803         slice_param->slice_type == SLICE_TYPE_SI)
804         return;
805
806     if (slice_param->slice_type == SLICE_TYPE_P ||
807         slice_param->slice_type == SLICE_TYPE_SP) {
808         num_ref_list = 1;
809     } else {
810         num_ref_list = 2;
811     }
812
813     for (i = 0; i < num_ref_list; i++) {
814         VAPictureH264 *va_pic;
815
816         if (i == 0) {
817             va_pic = slice_param->RefPicList0;
818         } else {
819             va_pic = slice_param->RefPicList1;
820         }
821
822         BEGIN_BCS_BATCH(batch, 10);
823         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | (10 - 2));
824         OUT_BCS_BATCH(batch, i);
825
826         for (j = 0; j < 32; j++) {
827             if (va_pic->flags & VA_PICTURE_H264_INVALID) {
828                 refs[j].non_exist = 1;
829                 refs[j].long_term = 1;
830                 refs[j].field_picture = 1;
831                 refs[j].frame_store_index = 0xf;
832                 refs[j].bottom_idc = 1;
833             } else {
834                 int frame_idx;
835                 
836                 for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
837                     if (gen6_mfd_context->reference_surface[frame_idx].surface_id != VA_INVALID_ID &&
838                         va_pic->picture_id == gen6_mfd_context->reference_surface[frame_idx].surface_id) {
839                         assert(frame_idx == gen6_mfd_context->reference_surface[frame_idx].frame_store_id);
840                         break;
841                     }
842                 }
843
844                 assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
845                 
846                 refs[j].non_exist = 0;
847                 refs[j].long_term = !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
848                 refs[j].field_picture = !!(va_pic->flags & 
849                                            (VA_PICTURE_H264_TOP_FIELD | 
850                                             VA_PICTURE_H264_BOTTOM_FIELD));
851                 refs[j].frame_store_index = frame_idx;
852                 refs[j].bottom_idc = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
853             }
854
855             va_pic++;
856         }
857         
858         intel_batchbuffer_data(batch, refs, sizeof(refs));
859         ADVANCE_BCS_BATCH(batch);
860     }
861 }
862
863 static void
864 gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
865                                 VAPictureParameterBufferH264 *pic_param,
866                                 VASliceParameterBufferH264 *slice_param,
867                                 struct gen6_mfd_context *gen6_mfd_context)
868 {
869     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
870     int i, j, num_weight_offset_table = 0;
871     short weightoffsets[32 * 6];
872
873     if ((slice_param->slice_type == SLICE_TYPE_P ||
874          slice_param->slice_type == SLICE_TYPE_SP) &&
875         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
876         num_weight_offset_table = 1;
877     }
878     
879     if ((slice_param->slice_type == SLICE_TYPE_B) &&
880         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
881         num_weight_offset_table = 2;
882     }
883
884     for (i = 0; i < num_weight_offset_table; i++) {
885         BEGIN_BCS_BATCH(batch, 98);
886         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
887         OUT_BCS_BATCH(batch, i);
888
889         if (i == 0) {
890             for (j = 0; j < 32; j++) {
891                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
892                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
893                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
894                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
895                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
896                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
897             }
898         } else {
899             for (j = 0; j < 32; j++) {
900                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
901                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
902                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
903                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
904                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
905                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
906             }
907         }
908
909         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
910         ADVANCE_BCS_BATCH(batch);
911     }
912 }
913
914 static int
915 gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
916 {
917     int out_slice_data_bit_offset;
918     int slice_header_size = in_slice_data_bit_offset / 8;
919     int i, j;
920
921     for (i = 0, j = 0; i < slice_header_size; i++, j++) {
922         if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
923             i++, j += 2;
924         }
925     }
926
927     out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
928
929     if (mode_flag == ENTROPY_CABAC)
930         out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
931
932     return out_slice_data_bit_offset;
933 }
934
935 static void
936 gen6_mfd_avc_bsd_object(VADriverContextP ctx,
937                         VAPictureParameterBufferH264 *pic_param,
938                         VASliceParameterBufferH264 *slice_param,
939                         dri_bo *slice_data_bo,
940                         struct gen6_mfd_context *gen6_mfd_context)
941 {
942     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
943     int slice_data_bit_offset;
944     uint8_t *slice_data = NULL;
945
946     dri_bo_map(slice_data_bo, 0);
947     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
948     slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data,
949                                                               pic_param->pic_fields.bits.entropy_coding_mode_flag,
950                                                               slice_param->slice_data_bit_offset);
951     dri_bo_unmap(slice_data_bo);
952
953     BEGIN_BCS_BATCH(batch, 6);
954     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
955     OUT_BCS_BATCH(batch, 
956                   ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
957     OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_data_bit_offset >> 3));
958     OUT_BCS_BATCH(batch,
959                   (0 << 31) |
960                   (0 << 14) |
961                   (0 << 12) |
962                   (0 << 10) |
963                   (0 << 8));
964     OUT_BCS_BATCH(batch,
965                   (0 << 16) |
966                   (0 << 6)  |
967                   ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
968     OUT_BCS_BATCH(batch, 0);
969     ADVANCE_BCS_BATCH(batch);
970 }
971
972 static void
973 gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx,
974                                       VAPictureParameterBufferH264 *pic_param,
975                                       struct gen6_mfd_context *gen6_mfd_context)
976 {
977     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
978
979     BEGIN_BCS_BATCH(batch, 6);
980     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
981     OUT_BCS_BATCH(batch, 0);
982     OUT_BCS_BATCH(batch, 0);
983     OUT_BCS_BATCH(batch, 0);
984     OUT_BCS_BATCH(batch, 0);
985     OUT_BCS_BATCH(batch, 0);
986     ADVANCE_BCS_BATCH(batch);
987 }
988
989 static void
990 gen6_mfd_avc_phantom_slice(VADriverContextP ctx,
991                            VAPictureParameterBufferH264 *pic_param,
992                            struct gen6_mfd_context *gen6_mfd_context)
993 {
994     gen6_mfd_avc_phantom_slice_state(ctx, pic_param, gen6_mfd_context);
995     gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, gen6_mfd_context);
996 }
997
998 static void
999 gen6_mfd_avc_decode_init(VADriverContextP ctx,
1000                          struct decode_state *decode_state,
1001                          struct gen6_mfd_context *gen6_mfd_context)
1002 {
1003     VAPictureParameterBufferH264 *pic_param;
1004     VASliceParameterBufferH264 *slice_param;
1005     VAPictureH264 *va_pic;
1006     struct i965_driver_data *i965 = i965_driver_data(ctx);
1007     struct object_surface *obj_surface;
1008     dri_bo *bo;
1009     int i, j, enable_avc_ildb = 0;
1010
1011     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
1012         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1013         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1014
1015         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1016             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1017             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1018                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1019                    (slice_param->slice_type == SLICE_TYPE_P) ||
1020                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1021                    (slice_param->slice_type == SLICE_TYPE_B));
1022
1023             if (slice_param->disable_deblocking_filter_idc != 1) {
1024                 enable_avc_ildb = 1;
1025                 break;
1026             }
1027
1028             slice_param++;
1029         }
1030     }
1031
1032     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1033     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1034     gen6_mfd_avc_frame_store_index(ctx, pic_param, gen6_mfd_context);
1035
1036     /* Current decoded picture */
1037     va_pic = &pic_param->CurrPic;
1038     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
1039     obj_surface = SURFACE(va_pic->picture_id);
1040     assert(obj_surface);
1041     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
1042     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
1043     gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
1044     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1045
1046     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1047     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1048     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
1049     gen6_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
1050
1051     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1052     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1053     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1054     gen6_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1055
1056     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1057     bo = dri_bo_alloc(i965->intel.bufmgr,
1058                       "intra row store",
1059                       128 * 64,
1060                       0x1000);
1061     assert(bo);
1062     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1063     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1064
1065     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1066     bo = dri_bo_alloc(i965->intel.bufmgr,
1067                       "deblocking filter row store",
1068                       30720, /* 4 * 120 * 64 */
1069                       0x1000);
1070     assert(bo);
1071     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1072     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1073
1074     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1075     bo = dri_bo_alloc(i965->intel.bufmgr,
1076                       "bsd mpc row store",
1077                       11520, /* 1.5 * 120 * 64 */
1078                       0x1000);
1079     assert(bo);
1080     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1081     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1082
1083     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
1084     bo = dri_bo_alloc(i965->intel.bufmgr,
1085                       "mpr row store",
1086                       7680, /* 1. 0 * 120 * 64 */
1087                       0x1000);
1088     assert(bo);
1089     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1090     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1091
1092     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1093 }
1094
1095 static void
1096 gen6_mfd_avc_decode_picture(VADriverContextP ctx,
1097                             struct decode_state *decode_state,
1098                             struct gen6_mfd_context *gen6_mfd_context)
1099 {
1100     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1101     VAPictureParameterBufferH264 *pic_param;
1102     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1103     dri_bo *slice_data_bo;
1104     int i, j;
1105
1106     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1107     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1108     gen6_mfd_avc_decode_init(ctx, decode_state, gen6_mfd_context);
1109
1110     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1111     intel_batchbuffer_emit_mi_flush(batch);
1112     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1113     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1114     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1115     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1116     gen6_mfd_avc_img_state(ctx, decode_state, gen6_mfd_context);
1117     gen6_mfd_avc_qm_state(ctx, decode_state, gen6_mfd_context);
1118
1119     for (j = 0; j < decode_state->num_slice_params; j++) {
1120         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1121         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1122         slice_data_bo = decode_state->slice_datas[j]->bo;
1123         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen6_mfd_context);
1124
1125         if (j == decode_state->num_slice_params - 1)
1126             next_slice_group_param = NULL;
1127         else
1128             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1129
1130         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1131             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1132             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1133                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1134                    (slice_param->slice_type == SLICE_TYPE_P) ||
1135                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1136                    (slice_param->slice_type == SLICE_TYPE_B));
1137
1138             if (i < decode_state->slice_params[j]->num_elements - 1)
1139                 next_slice_param = slice_param + 1;
1140             else
1141                 next_slice_param = next_slice_group_param;
1142
1143             gen6_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen6_mfd_context);
1144             gen6_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1145             gen6_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen6_mfd_context);
1146             gen6_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen6_mfd_context);
1147             gen6_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen6_mfd_context);
1148             slice_param++;
1149         }
1150     }
1151     
1152     gen6_mfd_avc_phantom_slice(ctx, pic_param, gen6_mfd_context);
1153     intel_batchbuffer_end_atomic(batch);
1154     intel_batchbuffer_flush(batch);
1155 }
1156
1157 static void
1158 gen6_mfd_mpeg2_decode_init(VADriverContextP ctx,
1159                            struct decode_state *decode_state,
1160                            struct gen6_mfd_context *gen6_mfd_context)
1161 {
1162     VAPictureParameterBufferMPEG2 *pic_param;
1163     struct i965_driver_data *i965 = i965_driver_data(ctx);
1164     struct object_surface *obj_surface;
1165     int i;
1166     dri_bo *bo;
1167
1168     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1169     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1170
1171     /* reference picture */
1172     obj_surface = SURFACE(pic_param->forward_reference_picture);
1173
1174     if (obj_surface && obj_surface->bo)
1175         gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1176     else
1177         gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1178
1179     obj_surface = SURFACE(pic_param->backward_reference_picture);
1180
1181     if (obj_surface && obj_surface->bo)
1182         gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1183     else
1184         gen6_mfd_context->reference_surface[1].surface_id = gen6_mfd_context->reference_surface[0].surface_id;
1185
1186     /* must do so !!! */
1187     for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
1188         gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
1189
1190     /* Current decoded picture */
1191     obj_surface = SURFACE(decode_state->current_render_target);
1192     assert(obj_surface);
1193     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1194
1195     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1196     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1197     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1198     gen6_mfd_context->pre_deblocking_output.valid = 1;
1199
1200     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1201     bo = dri_bo_alloc(i965->intel.bufmgr,
1202                       "bsd mpc row store",
1203                       11520, /* 1.5 * 120 * 64 */
1204                       0x1000);
1205     assert(bo);
1206     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1207     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1208
1209     gen6_mfd_context->post_deblocking_output.valid = 0;
1210     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1211     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1212     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1213     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1214 }
1215
1216 static void
1217 gen6_mfd_mpeg2_pic_state(VADriverContextP ctx,
1218                          struct decode_state *decode_state,
1219                          struct gen6_mfd_context *gen6_mfd_context)
1220 {
1221     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1222     VAPictureParameterBufferMPEG2 *pic_param;
1223
1224     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1225     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1226
1227     BEGIN_BCS_BATCH(batch, 4);
1228     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (4 - 2));
1229     OUT_BCS_BATCH(batch,
1230                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1231                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1232                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1233                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1234                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1235                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1236                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1237                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1238                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1239                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1240                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1241                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1242     OUT_BCS_BATCH(batch,
1243                   pic_param->picture_coding_type << 9);
1244     OUT_BCS_BATCH(batch,
1245                   (ALIGN(pic_param->vertical_size, 16) / 16) << 16 |
1246                   (ALIGN(pic_param->horizontal_size, 16) / 16));
1247     ADVANCE_BCS_BATCH(batch);
1248 }
1249
1250 static void
1251 gen6_mfd_mpeg2_qm_state(VADriverContextP ctx,
1252                         struct decode_state *decode_state,
1253                         struct gen6_mfd_context *gen6_mfd_context)
1254 {
1255     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1256     VAIQMatrixBufferMPEG2 *iq_matrix;
1257     int i;
1258
1259     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
1260         return;
1261
1262     iq_matrix = (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1263
1264     for (i = 0; i < 2; i++) {
1265         int k, m;
1266         unsigned char *qm = NULL;
1267         unsigned char qmx[64];
1268
1269         if (i == 0) {
1270             if (iq_matrix->load_intra_quantiser_matrix)
1271                 qm = iq_matrix->intra_quantiser_matrix;
1272         } else {
1273             if (iq_matrix->load_non_intra_quantiser_matrix)
1274                 qm = iq_matrix->non_intra_quantiser_matrix;
1275         }
1276
1277         if (!qm)
1278             continue;
1279
1280         /* Upload quantisation matrix in raster order. The mplayer vaapi
1281          * patch passes quantisation matrix in zig-zag order to va library.
1282          */
1283         for (k = 0; k < 64; k++) {
1284             m = zigzag_direct[k];
1285             qmx[m] = qm[k];
1286         }
1287
1288         BEGIN_BCS_BATCH(batch, 18);
1289         OUT_BCS_BATCH(batch, MFX_MPEG2_QM_STATE | (18 - 2));
1290         OUT_BCS_BATCH(batch, i);
1291         intel_batchbuffer_data(batch, qmx, 64);
1292         ADVANCE_BCS_BATCH(batch);
1293     }
1294 }
1295
1296 static void
1297 gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1298                           VAPictureParameterBufferMPEG2 *pic_param,
1299                           VASliceParameterBufferMPEG2 *slice_param,
1300                           VASliceParameterBufferMPEG2 *next_slice_param,
1301                           struct gen6_mfd_context *gen6_mfd_context)
1302 {
1303     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1304     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1305     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic = 0;
1306
1307     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1308         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1309         is_field_pic = 1;
1310
1311     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic);
1312     hpos0 = slice_param->slice_horizontal_position;
1313
1314     if (next_slice_param == NULL) {
1315         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1316         hpos1 = 0;
1317     } else {
1318         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic);
1319         hpos1 = next_slice_param->slice_horizontal_position;
1320     }
1321
1322     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1323
1324     BEGIN_BCS_BATCH(batch, 5);
1325     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1326     OUT_BCS_BATCH(batch, 
1327                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1328     OUT_BCS_BATCH(batch, 
1329                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1330     OUT_BCS_BATCH(batch,
1331                   hpos0 << 24 |
1332                   vpos0 << 16 |
1333                   mb_count << 8 |
1334                   (next_slice_param == NULL) << 5 |
1335                   (next_slice_param == NULL) << 3 |
1336                   (slice_param->macroblock_offset & 0x7));
1337     OUT_BCS_BATCH(batch,
1338                   slice_param->quantiser_scale_code << 24);
1339     ADVANCE_BCS_BATCH(batch);
1340 }
1341
1342 static void
1343 gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1344                               struct decode_state *decode_state,
1345                               struct gen6_mfd_context *gen6_mfd_context)
1346 {
1347     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1348     VAPictureParameterBufferMPEG2 *pic_param;
1349     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1350     dri_bo *slice_data_bo;
1351     int i, j;
1352
1353     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1354     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1355
1356     gen6_mfd_mpeg2_decode_init(ctx, decode_state, gen6_mfd_context);
1357     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1358     intel_batchbuffer_emit_mi_flush(batch);
1359     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1360     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1361     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1362     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1363     gen6_mfd_mpeg2_pic_state(ctx, decode_state, gen6_mfd_context);
1364     gen6_mfd_mpeg2_qm_state(ctx, decode_state, gen6_mfd_context);
1365
1366     for (j = 0; j < decode_state->num_slice_params; j++) {
1367         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1368         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1369         slice_data_bo = decode_state->slice_datas[j]->bo;
1370         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context);
1371
1372         if (j == decode_state->num_slice_params - 1)
1373             next_slice_group_param = NULL;
1374         else
1375             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1376
1377         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1378             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1379
1380             if (i < decode_state->slice_params[j]->num_elements - 1)
1381                 next_slice_param = slice_param + 1;
1382             else
1383                 next_slice_param = next_slice_group_param;
1384
1385             gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1386             slice_param++;
1387         }
1388     }
1389
1390     intel_batchbuffer_end_atomic(batch);
1391     intel_batchbuffer_flush(batch);
1392 }
1393
1394 static const int va_to_gen6_vc1_pic_type[5] = {
1395     GEN6_VC1_I_PICTURE,
1396     GEN6_VC1_P_PICTURE,
1397     GEN6_VC1_B_PICTURE,
1398     GEN6_VC1_BI_PICTURE,
1399     GEN6_VC1_P_PICTURE,
1400 };
1401
1402 static const int va_to_gen6_vc1_mv[4] = {
1403     1, /* 1-MV */
1404     2, /* 1-MV half-pel */
1405     3, /* 1-MV half-pef bilinear */
1406     0, /* Mixed MV */
1407 };
1408
1409 static const int b_picture_scale_factor[21] = {
1410     128, 85,  170, 64,  192,
1411     51,  102, 153, 204, 43,
1412     215, 37,  74,  111, 148,
1413     185, 222, 32,  96,  160, 
1414     224,
1415 };
1416
1417 static const int va_to_gen6_vc1_condover[3] = {
1418     0,
1419     2,
1420     3
1421 };
1422
1423 static const int va_to_gen6_vc1_profile[4] = {
1424     GEN6_VC1_SIMPLE_PROFILE,
1425     GEN6_VC1_MAIN_PROFILE,
1426     GEN6_VC1_RESERVED_PROFILE,
1427     GEN6_VC1_ADVANCED_PROFILE
1428 };
1429
1430 static const int va_to_gen6_vc1_ttfrm[8] = {
1431     0,  /* 8x8 */
1432     1,  /* 8x4 bottom */
1433     1,  /* 8x4 top */
1434     1,  /* 8x4 */
1435     2,  /* 4x8 bottom */
1436     2,  /* 4x8 top */
1437     2,  /* 4x8 */
1438     3,  /* 4x4 */
1439 };
1440
1441 static void 
1442 gen6_mfd_free_vc1_surface(void **data)
1443 {
1444     struct gen6_vc1_surface *gen6_vc1_surface = *data;
1445
1446     if (!gen6_vc1_surface)
1447         return;
1448
1449     dri_bo_unreference(gen6_vc1_surface->dmv);
1450     free(gen6_vc1_surface);
1451     *data = NULL;
1452 }
1453
1454 static void
1455 gen6_mfd_init_vc1_surface(VADriverContextP ctx, 
1456                           VAPictureParameterBufferVC1 *pic_param,
1457                           struct object_surface *obj_surface)
1458 {
1459     struct i965_driver_data *i965 = i965_driver_data(ctx);
1460     struct gen6_vc1_surface *gen6_vc1_surface = obj_surface->private_data;
1461
1462     obj_surface->free_private_data = gen6_mfd_free_vc1_surface;
1463
1464     if (!gen6_vc1_surface) {
1465         gen6_vc1_surface = calloc(sizeof(struct gen6_vc1_surface), 1);
1466         assert((obj_surface->size & 0x3f) == 0);
1467         obj_surface->private_data = gen6_vc1_surface;
1468     }
1469
1470     gen6_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1471
1472     if (gen6_vc1_surface->dmv == NULL) {
1473         gen6_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1474                                              "direct mv w/r buffer",
1475                                              557056,    /* 64 * 128 * 64 */
1476                                              0x1000);
1477     }
1478 }
1479
1480 static void
1481 gen6_mfd_vc1_decode_init(VADriverContextP ctx,
1482                          struct decode_state *decode_state,
1483                          struct gen6_mfd_context *gen6_mfd_context)
1484 {
1485     VAPictureParameterBufferVC1 *pic_param;
1486     struct i965_driver_data *i965 = i965_driver_data(ctx);
1487     struct object_surface *obj_surface;
1488     int i;
1489     dri_bo *bo;
1490
1491     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1492     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1493
1494     /* reference picture */
1495     obj_surface = SURFACE(pic_param->forward_reference_picture);
1496
1497     if (obj_surface && obj_surface->bo)
1498         gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1499     else
1500         gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1501
1502     obj_surface = SURFACE(pic_param->backward_reference_picture);
1503
1504     if (obj_surface && obj_surface->bo)
1505         gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1506     else
1507         gen6_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1508
1509     /* must do so !!! */
1510     for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
1511         gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
1512
1513     /* Current decoded picture */
1514     obj_surface = SURFACE(decode_state->current_render_target);
1515     assert(obj_surface);
1516     gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1517     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1518
1519     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1520     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1521     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
1522     gen6_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1523
1524     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1525     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1526     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1527     gen6_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1528
1529     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1530     bo = dri_bo_alloc(i965->intel.bufmgr,
1531                       "intra row store",
1532                       128 * 64,
1533                       0x1000);
1534     assert(bo);
1535     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1536     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1537
1538     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1539     bo = dri_bo_alloc(i965->intel.bufmgr,
1540                       "deblocking filter row store",
1541                       46080, /* 6 * 120 * 64 */
1542                       0x1000);
1543     assert(bo);
1544     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1545     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1546
1547     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1548     bo = dri_bo_alloc(i965->intel.bufmgr,
1549                       "bsd mpc row store",
1550                       11520, /* 1.5 * 120 * 64 */
1551                       0x1000);
1552     assert(bo);
1553     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1554     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1555
1556     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1557
1558     gen6_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1559     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
1560     
1561     if (gen6_mfd_context->bitplane_read_buffer.valid) {
1562         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1563         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1564         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1565         int src_w, src_h;
1566         uint8_t *src = NULL, *dst = NULL;
1567
1568         assert(decode_state->bit_plane->buffer);
1569         src = decode_state->bit_plane->buffer;
1570
1571         bo = dri_bo_alloc(i965->intel.bufmgr,
1572                           "VC-1 Bitplane",
1573                           bitplane_width * bitplane_width,
1574                           0x1000);
1575         assert(bo);
1576         gen6_mfd_context->bitplane_read_buffer.bo = bo;
1577
1578         dri_bo_map(bo, True);
1579         assert(bo->virtual);
1580         dst = bo->virtual;
1581
1582         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1583             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1584                 int src_index, dst_index;
1585                 int src_shift;
1586                 uint8_t src_value;
1587
1588                 src_index = (src_h * width_in_mbs + src_w) / 2;
1589                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1590                 src_value = ((src[src_index] >> src_shift) & 0xf);
1591
1592                 dst_index = src_w / 2;
1593                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1594             }
1595
1596             if (src_w & 1)
1597                 dst[src_w / 2] >>= 4;
1598
1599             dst += bitplane_width;
1600         }
1601
1602         dri_bo_unmap(bo);
1603     } else
1604         gen6_mfd_context->bitplane_read_buffer.bo = NULL;
1605 }
1606
1607 static void
1608 gen6_mfd_vc1_pic_state(VADriverContextP ctx,
1609                        struct decode_state *decode_state,
1610                        struct gen6_mfd_context *gen6_mfd_context)
1611 {
1612     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1613     VAPictureParameterBufferVC1 *pic_param;
1614     struct i965_driver_data *i965 = i965_driver_data(ctx);
1615     struct object_surface *obj_surface;
1616     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1617     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1618     int unified_mv_mode;
1619     int ref_field_pic_polarity = 0;
1620     int scale_factor = 0;
1621     int trans_ac_y = 0;
1622     int dmv_surface_valid = 0;
1623     int brfd = 0;
1624     int fcm = 0;
1625     int picture_type;
1626     int profile;
1627     int overlap;
1628
1629     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1630     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1631
1632     profile = va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile];
1633     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1634     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1635     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1636     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1637     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1638     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1639     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1640
1641     if (dquant == 0) {
1642         alt_pquant_config = 0;
1643         alt_pquant_edge_mask = 0;
1644     } else if (dquant == 2) {
1645         alt_pquant_config = 1;
1646         alt_pquant_edge_mask = 0xf;
1647     } else {
1648         assert(dquant == 1);
1649         if (dquantfrm == 0) {
1650             alt_pquant_config = 0;
1651             alt_pquant_edge_mask = 0;
1652             alt_pq = 0;
1653         } else {
1654             assert(dquantfrm == 1);
1655             alt_pquant_config = 1;
1656
1657             switch (dqprofile) {
1658             case 3:
1659                 if (dqbilevel == 0) {
1660                     alt_pquant_config = 2;
1661                     alt_pquant_edge_mask = 0;
1662                 } else {
1663                     assert(dqbilevel == 1);
1664                     alt_pquant_config = 3;
1665                     alt_pquant_edge_mask = 0;
1666                 }
1667                 break;
1668                 
1669             case 0:
1670                 alt_pquant_edge_mask = 0xf;
1671                 break;
1672
1673             case 1:
1674                 if (dqdbedge == 3)
1675                     alt_pquant_edge_mask = 0x9;
1676                 else
1677                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1678
1679                 break;
1680
1681             case 2:
1682                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1683                 break;
1684
1685             default:
1686                 assert(0);
1687             }
1688         }
1689     }
1690
1691     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1692         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1693         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1694     } else {
1695         assert(pic_param->mv_fields.bits.mv_mode < 4);
1696         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1697     }
1698
1699     if (pic_param->sequence_fields.bits.interlace == 1 &&
1700         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1701         /* FIXME: calculate reference field picture polarity */
1702         assert(0);
1703         ref_field_pic_polarity = 0;
1704     }
1705
1706     if (pic_param->b_picture_fraction < 21)
1707         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1708
1709     picture_type = va_to_gen6_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1710     
1711     if (profile == GEN6_VC1_ADVANCED_PROFILE && 
1712         picture_type == GEN6_VC1_I_PICTURE)
1713         picture_type = GEN6_VC1_BI_PICTURE;
1714
1715     if (picture_type == GEN6_VC1_I_PICTURE || picture_type == GEN6_VC1_BI_PICTURE) /* I picture */
1716         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1717     else
1718         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1719
1720
1721     if (picture_type == GEN6_VC1_B_PICTURE) {
1722         struct gen6_vc1_surface *gen6_vc1_surface = NULL;
1723
1724         obj_surface = SURFACE(pic_param->backward_reference_picture);
1725         assert(obj_surface);
1726         gen6_vc1_surface = obj_surface->private_data;
1727
1728         if (!gen6_vc1_surface || 
1729             (va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_I_PICTURE ||
1730              va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_BI_PICTURE))
1731             dmv_surface_valid = 0;
1732         else
1733             dmv_surface_valid = 1;
1734     }
1735
1736     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1737
1738     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1739         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1740     else {
1741         if (pic_param->picture_fields.bits.top_field_first)
1742             fcm = 2;
1743         else
1744             fcm = 3;
1745     }
1746
1747     if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_B_PICTURE) { /* B picture */
1748         brfd = pic_param->reference_fields.bits.reference_distance;
1749         brfd = (scale_factor * brfd) >> 8;
1750         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1751
1752         if (brfd < 0)
1753             brfd = 0;
1754     }
1755
1756     overlap = pic_param->sequence_fields.bits.overlap;
1757     if (profile != GEN6_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1758         overlap = 0;
1759
1760     assert(pic_param->conditional_overlap_flag < 3);
1761     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1762
1763     BEGIN_BCS_BATCH(batch, 6);
1764     OUT_BCS_BATCH(batch, MFX_VC1_PIC_STATE | (6 - 2));
1765     OUT_BCS_BATCH(batch,
1766                   (ALIGN(pic_param->coded_height, 16) / 16) << 16 |
1767                   (ALIGN(pic_param->coded_width, 16) / 16));
1768     OUT_BCS_BATCH(batch,
1769                   pic_param->sequence_fields.bits.syncmarker << 31 |
1770                   1 << 29 | /* concealment */
1771                   alt_pq << 24 |
1772                   pic_param->entrypoint_fields.bits.loopfilter << 23 |
1773                   overlap << 22 |
1774                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 21 | /* implicit quantizer */
1775                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 16 |
1776                   alt_pquant_edge_mask << 12 |
1777                   alt_pquant_config << 10 |
1778                   pic_param->pic_quantizer_fields.bits.half_qp << 9 |
1779                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 8 |
1780                   va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] << 6 |
1781                   !pic_param->picture_fields.bits.is_first_field << 5 |
1782                   picture_type << 2 |
1783                   fcm << 0);
1784     OUT_BCS_BATCH(batch,
1785                   !!pic_param->bitplane_present.value << 23 |
1786                   !pic_param->bitplane_present.flags.bp_forward_mb << 22 |
1787                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 21 |
1788                   !pic_param->bitplane_present.flags.bp_skip_mb << 20 |
1789                   !pic_param->bitplane_present.flags.bp_direct_mb << 19 |
1790                   !pic_param->bitplane_present.flags.bp_overflags << 18 |
1791                   !pic_param->bitplane_present.flags.bp_ac_pred << 17 |
1792                   !pic_param->bitplane_present.flags.bp_field_tx << 16 |
1793                   pic_param->mv_fields.bits.extended_dmv_range << 14 |
1794                   pic_param->mv_fields.bits.extended_mv_range << 12 |
1795                   pic_param->mv_fields.bits.four_mv_switch << 11 |
1796                   pic_param->fast_uvmc_flag << 10 |
1797                   unified_mv_mode << 8 |
1798                   ref_field_pic_polarity << 6 |
1799                   pic_param->reference_fields.bits.num_reference_pictures << 5 |
1800                   pic_param->reference_fields.bits.reference_distance << 0);
1801     OUT_BCS_BATCH(batch,
1802                   scale_factor << 24 |
1803                   pic_param->mv_fields.bits.mv_table << 20 |
1804                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1805                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1806                   va_to_gen6_vc1_ttfrm[pic_param->transform_fields.bits.frame_level_transform_type] << 12 |
1807                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1808                   pic_param->mb_mode_table << 8 |
1809                   trans_ac_y << 6 |
1810                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1811                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1812                   pic_param->cbp_table << 0);
1813     OUT_BCS_BATCH(batch,
1814                   dmv_surface_valid << 13 |
1815                   brfd << 8 |
1816                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1));
1817     ADVANCE_BCS_BATCH(batch);
1818 }
1819
1820 static void
1821 gen6_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1822                              struct decode_state *decode_state,
1823                              struct gen6_mfd_context *gen6_mfd_context)
1824 {
1825     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1826     VAPictureParameterBufferVC1 *pic_param;
1827     int interpolation_mode = 0;
1828     int intensitycomp_single;
1829
1830     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1831     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1832
1833     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1834         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1835          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1836         interpolation_mode = 2; /* Half-pel bilinear */
1837     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1838              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1839               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1840         interpolation_mode = 0; /* Half-pel bicubic */
1841     else
1842         interpolation_mode = 1; /* Quarter-pel bicubic */
1843
1844     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1845     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1846     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1847
1848     BEGIN_BCS_BATCH(batch, 7);
1849     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (7 - 2));
1850     OUT_BCS_BATCH(batch,
1851                   0 << 8 | /* FIXME: interlace mode */
1852                   pic_param->rounding_control << 4 |
1853                   va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile] << 2);
1854     OUT_BCS_BATCH(batch,
1855                   pic_param->luma_shift << 16 |
1856                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1857     OUT_BCS_BATCH(batch, 0);
1858     OUT_BCS_BATCH(batch, 0);
1859     OUT_BCS_BATCH(batch, 0);
1860     OUT_BCS_BATCH(batch,
1861                   interpolation_mode << 19 |
1862                   pic_param->fast_uvmc_flag << 18 |
1863                   0 << 17 | /* FIXME: scale up or down ??? */
1864                   pic_param->range_reduction_frame << 16 |
1865                   0 << 6 | /* FIXME: double ??? */
1866                   0 << 4 |
1867                   intensitycomp_single << 2 |
1868                   intensitycomp_single << 0);
1869     ADVANCE_BCS_BATCH(batch);
1870 }
1871
1872
1873 static void
1874 gen6_mfd_vc1_directmode_state(VADriverContextP ctx,
1875                               struct decode_state *decode_state,
1876                               struct gen6_mfd_context *gen6_mfd_context)
1877 {
1878     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1879     VAPictureParameterBufferVC1 *pic_param;
1880     struct i965_driver_data *i965 = i965_driver_data(ctx);
1881     struct object_surface *obj_surface;
1882     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1883
1884     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1885     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1886
1887     obj_surface = SURFACE(decode_state->current_render_target);
1888
1889     if (obj_surface && obj_surface->private_data) {
1890         dmv_write_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1891     }
1892
1893     obj_surface = SURFACE(pic_param->backward_reference_picture);
1894
1895     if (obj_surface && obj_surface->private_data) {
1896         dmv_read_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1897     }
1898
1899     BEGIN_BCS_BATCH(batch, 3);
1900     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1901
1902     if (dmv_write_buffer)
1903         OUT_BCS_RELOC(batch, dmv_write_buffer,
1904                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1905                       0);
1906     else
1907         OUT_BCS_BATCH(batch, 0);
1908
1909     if (dmv_read_buffer)
1910         OUT_BCS_RELOC(batch, dmv_read_buffer,
1911                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1912                       0);
1913     else
1914         OUT_BCS_BATCH(batch, 0);
1915                   
1916     ADVANCE_BCS_BATCH(batch);
1917 }
1918
1919 static int
1920 gen6_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1921 {
1922     int out_slice_data_bit_offset;
1923     int slice_header_size = in_slice_data_bit_offset / 8;
1924     int i, j;
1925
1926     if (profile != 3)
1927         out_slice_data_bit_offset = in_slice_data_bit_offset;
1928     else {
1929         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1930             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1931                 i++, j += 2;
1932             }
1933         }
1934
1935         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1936     }
1937
1938     return out_slice_data_bit_offset;
1939 }
1940
1941 static void
1942 gen6_mfd_vc1_bsd_object(VADriverContextP ctx,
1943                         VAPictureParameterBufferVC1 *pic_param,
1944                         VASliceParameterBufferVC1 *slice_param,
1945                         VASliceParameterBufferVC1 *next_slice_param,
1946                         dri_bo *slice_data_bo,
1947                         struct gen6_mfd_context *gen6_mfd_context)
1948 {
1949     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1950     int next_slice_start_vert_pos;
1951     int macroblock_offset;
1952     uint8_t *slice_data = NULL;
1953
1954     dri_bo_map(slice_data_bo, 0);
1955     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1956     macroblock_offset = gen6_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1957                                                                slice_param->macroblock_offset,
1958                                                                pic_param->sequence_fields.bits.profile);
1959     dri_bo_unmap(slice_data_bo);
1960
1961     if (next_slice_param)
1962         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1963     else
1964         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1965
1966     BEGIN_BCS_BATCH(batch, 4);
1967     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (4 - 2));
1968     OUT_BCS_BATCH(batch, 
1969                   slice_param->slice_data_size - (macroblock_offset >> 3));
1970     OUT_BCS_BATCH(batch, 
1971                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1972     OUT_BCS_BATCH(batch,
1973                   slice_param->slice_vertical_position << 24 |
1974                   next_slice_start_vert_pos << 16 |
1975                   (macroblock_offset & 0x7));
1976     ADVANCE_BCS_BATCH(batch);
1977 }
1978
1979 static void
1980 gen6_mfd_vc1_decode_picture(VADriverContextP ctx,
1981                             struct decode_state *decode_state,
1982                             struct gen6_mfd_context *gen6_mfd_context)
1983 {
1984     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1985     VAPictureParameterBufferVC1 *pic_param;
1986     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1987     dri_bo *slice_data_bo;
1988     int i, j;
1989
1990     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1991     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1992
1993     gen6_mfd_vc1_decode_init(ctx, decode_state, gen6_mfd_context);
1994     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1995     intel_batchbuffer_emit_mi_flush(batch);
1996     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1997     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1998     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1999     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
2000     gen6_mfd_vc1_pic_state(ctx, decode_state, gen6_mfd_context);
2001     gen6_mfd_vc1_pred_pipe_state(ctx, decode_state, gen6_mfd_context);
2002     gen6_mfd_vc1_directmode_state(ctx, decode_state, gen6_mfd_context);
2003
2004     for (j = 0; j < decode_state->num_slice_params; j++) {
2005         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2006         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2007         slice_data_bo = decode_state->slice_datas[j]->bo;
2008         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen6_mfd_context);
2009
2010         if (j == decode_state->num_slice_params - 1)
2011             next_slice_group_param = NULL;
2012         else
2013             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2014
2015         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2016             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2017
2018             if (i < decode_state->slice_params[j]->num_elements - 1)
2019                 next_slice_param = slice_param + 1;
2020             else
2021                 next_slice_param = next_slice_group_param;
2022
2023             gen6_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen6_mfd_context);
2024             slice_param++;
2025         }
2026     }
2027
2028     intel_batchbuffer_end_atomic(batch);
2029     intel_batchbuffer_flush(batch);
2030 }
2031
2032 static void 
2033 gen6_mfd_decode_picture(VADriverContextP ctx, 
2034                         VAProfile profile, 
2035                         union codec_state *codec_state,
2036                         struct hw_context *hw_context)
2037
2038 {
2039     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
2040     struct decode_state *decode_state = &codec_state->dec;
2041
2042     assert(gen6_mfd_context);
2043
2044     switch (profile) {
2045     case VAProfileMPEG2Simple:
2046     case VAProfileMPEG2Main:
2047         gen6_mfd_mpeg2_decode_picture(ctx, decode_state, gen6_mfd_context);
2048         break;
2049         
2050     case VAProfileH264Baseline:
2051     case VAProfileH264Main:
2052     case VAProfileH264High:
2053         gen6_mfd_avc_decode_picture(ctx, decode_state, gen6_mfd_context);
2054         break;
2055
2056     case VAProfileVC1Simple:
2057     case VAProfileVC1Main:
2058     case VAProfileVC1Advanced:
2059         gen6_mfd_vc1_decode_picture(ctx, decode_state, gen6_mfd_context);
2060         break;
2061
2062     default:
2063         assert(0);
2064         break;
2065     }
2066 }
2067
2068 static void
2069 gen6_mfd_context_destroy(void *hw_context)
2070 {
2071     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
2072
2073     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
2074     gen6_mfd_context->post_deblocking_output.bo = NULL;
2075
2076     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
2077     gen6_mfd_context->pre_deblocking_output.bo = NULL;
2078
2079     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
2080     gen6_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2081
2082     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2083     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2084
2085     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2086     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2087
2088     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
2089     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2090
2091     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
2092     gen6_mfd_context->bitplane_read_buffer.bo = NULL;
2093
2094     intel_batchbuffer_free(gen6_mfd_context->base.batch);
2095     free(gen6_mfd_context);
2096 }
2097
2098 struct hw_context *
2099 gen6_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
2100 {
2101     struct intel_driver_data *intel = intel_driver_data(ctx);
2102     struct gen6_mfd_context *gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
2103     int i;
2104
2105     gen6_mfd_context->base.destroy = gen6_mfd_context_destroy;
2106     gen6_mfd_context->base.run = gen6_mfd_decode_picture;
2107     gen6_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
2108
2109     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
2110         gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2111         gen6_mfd_context->reference_surface[i].frame_store_id = -1;
2112     }
2113     
2114     return (struct hw_context *)gen6_mfd_context;
2115 }