OSDN Git Service

jpeg/dec: gen8+ set correct fourcc for monochrome decode
[android-x86/hardware-intel-common-vaapi.git] / src / gen75_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui  <yakui.zhao@intel.com>
27  *
28  */
29 #include "sysdeps.h"
30
31 #include <va/va_dec_jpeg.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35 #include "i965_defines.h"
36 #include "i965_drv_video.h"
37 #include "i965_decoder_utils.h"
38 #include "gen7_mfd.h"
39 #include "intel_media.h"
40
41 #define B0_STEP_REV             2
42 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
43
44 static const uint32_t zigzag_direct[64] = {
45     0,   1,  8, 16,  9,  2,  3, 10,
46     17, 24, 32, 25, 18, 11,  4,  5,
47     12, 19, 26, 33, 40, 48, 41, 34,
48     27, 20, 13,  6,  7, 14, 21, 28,
49     35, 42, 49, 56, 57, 50, 43, 36,
50     29, 22, 15, 23, 30, 37, 44, 51,
51     58, 59, 52, 45, 38, 31, 39, 46,
52     53, 60, 61, 54, 47, 55, 62, 63
53 };
54
55 static void
56 gen75_mfd_init_avc_surface(VADriverContextP ctx, 
57                           VAPictureParameterBufferH264 *pic_param,
58                           struct object_surface *obj_surface)
59 {
60     struct i965_driver_data *i965 = i965_driver_data(ctx);
61     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
62     int width_in_mbs, height_in_mbs;
63
64     obj_surface->free_private_data = gen_free_avc_surface;
65     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
66     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
67
68     if (!gen7_avc_surface) {
69         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
70         assert(gen7_avc_surface);
71         gen7_avc_surface->base.frame_store_id = -1;
72         assert((obj_surface->size & 0x3f) == 0);
73         obj_surface->private_data = gen7_avc_surface;
74     }
75
76     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
77                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
78
79     if (gen7_avc_surface->dmv_top == NULL) {
80         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
81                                                  "direct mv w/r buffer",
82                                                  width_in_mbs * height_in_mbs * 128,
83                                                  0x1000);
84         assert(gen7_avc_surface->dmv_top);
85     }
86
87     if (gen7_avc_surface->dmv_bottom_flag &&
88         gen7_avc_surface->dmv_bottom == NULL) {
89         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
90                                                     "direct mv w/r buffer",
91                                                     width_in_mbs * height_in_mbs * 128,                                                    
92                                                     0x1000);
93         assert(gen7_avc_surface->dmv_bottom);
94     }
95 }
96
97 static void
98 gen75_mfd_pipe_mode_select(VADriverContextP ctx,
99                           struct decode_state *decode_state,
100                           int standard_select,
101                           struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG);
109
110     BEGIN_BCS_BATCH(batch, 5);
111     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
112     OUT_BCS_BATCH(batch,
113                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
114                   (MFD_MODE_VLD << 15) | /* VLD mode */
115                   (0 << 10) | /* disable Stream-Out */
116                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
117                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
118                   (0 << 5)  | /* not in stitch mode */
119                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
120                   (standard_select << 0));
121     OUT_BCS_BATCH(batch,
122                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
123                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
124                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
125                   (0 << 1)  |
126                   (0 << 0));
127     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
128     OUT_BCS_BATCH(batch, 0); /* reserved */
129     ADVANCE_BCS_BATCH(batch);
130 }
131
132 static void
133 gen75_mfd_surface_state(VADriverContextP ctx,
134                        struct decode_state *decode_state,
135                        int standard_select,
136                        struct gen7_mfd_context *gen7_mfd_context)
137 {
138     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
139     struct object_surface *obj_surface = decode_state->render_object;
140     unsigned int y_cb_offset;
141     unsigned int y_cr_offset;
142     unsigned int surface_format;
143
144     assert(obj_surface);
145
146     y_cb_offset = obj_surface->y_cb_offset;
147     y_cr_offset = obj_surface->y_cr_offset;
148
149     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
150         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
151
152     BEGIN_BCS_BATCH(batch, 6);
153     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
154     OUT_BCS_BATCH(batch, 0);
155     OUT_BCS_BATCH(batch,
156                   ((obj_surface->orig_height - 1) << 18) |
157                   ((obj_surface->orig_width - 1) << 4));
158     OUT_BCS_BATCH(batch,
159                   (surface_format << 28) | /* 420 planar YUV surface */
160                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
161                   (0 << 22) | /* surface object control state, ignored */
162                   ((obj_surface->width - 1) << 3) | /* pitch */
163                   (0 << 2)  | /* must be 0 */
164                   (1 << 1)  | /* must be tiled */
165                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
166     OUT_BCS_BATCH(batch,
167                   (0 << 16) | /* X offset for U(Cb), must be 0 */
168                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
169     OUT_BCS_BATCH(batch,
170                   (0 << 16) | /* X offset for V(Cr), must be 0 */
171                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
172     ADVANCE_BCS_BATCH(batch);
173 }
174
175 static void
176 gen75_mfd_pipe_buf_addr_state_bplus(VADriverContextP ctx,
177                              struct decode_state *decode_state,
178                              int standard_select,
179                              struct gen7_mfd_context *gen7_mfd_context)
180 {
181     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
182     int i;
183
184     BEGIN_BCS_BATCH(batch, 61);
185     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
186         /* Pre-deblock 1-3 */
187     if (gen7_mfd_context->pre_deblocking_output.valid)
188         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
189                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
190                       0);
191     else
192         OUT_BCS_BATCH(batch, 0);
193
194         OUT_BCS_BATCH(batch, 0);
195         OUT_BCS_BATCH(batch, 0);
196         /* Post-debloing 4-6 */
197     if (gen7_mfd_context->post_deblocking_output.valid)
198         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
199                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
200                       0);
201     else
202         OUT_BCS_BATCH(batch, 0);
203
204         OUT_BCS_BATCH(batch, 0);
205         OUT_BCS_BATCH(batch, 0);
206
207         /* uncompressed-video & stream out 7-12 */
208     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
209     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
210         OUT_BCS_BATCH(batch, 0);
211         OUT_BCS_BATCH(batch, 0);
212         OUT_BCS_BATCH(batch, 0);
213         OUT_BCS_BATCH(batch, 0);
214
215         /* intra row-store scratch 13-15 */
216     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
217         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
218                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
219                       0);
220     else
221         OUT_BCS_BATCH(batch, 0);
222
223         OUT_BCS_BATCH(batch, 0);
224         OUT_BCS_BATCH(batch, 0);
225         /* deblocking-filter-row-store 16-18 */
226     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
227         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
228                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
229                       0);
230     else
231         OUT_BCS_BATCH(batch, 0);
232         OUT_BCS_BATCH(batch, 0);
233         OUT_BCS_BATCH(batch, 0);
234
235     /* DW 19..50 */
236     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
237         struct object_surface *obj_surface;
238
239         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
240             gen7_mfd_context->reference_surface[i].obj_surface &&
241             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
242             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
243
244             OUT_BCS_RELOC(batch, obj_surface->bo,
245                           I915_GEM_DOMAIN_INSTRUCTION, 0,
246                           0);
247         } else {
248             OUT_BCS_BATCH(batch, 0);
249         }
250             OUT_BCS_BATCH(batch, 0);
251     }
252         /* reference property 51 */
253     OUT_BCS_BATCH(batch, 0);  
254         
255         /* Macroblock status & ILDB 52-57 */
256         OUT_BCS_BATCH(batch, 0);
257         OUT_BCS_BATCH(batch, 0);
258         OUT_BCS_BATCH(batch, 0);
259         OUT_BCS_BATCH(batch, 0);
260         OUT_BCS_BATCH(batch, 0);
261         OUT_BCS_BATCH(batch, 0);
262
263         /* the second Macroblock status 58-60 */        
264         OUT_BCS_BATCH(batch, 0);
265         OUT_BCS_BATCH(batch, 0);
266         OUT_BCS_BATCH(batch, 0);
267     ADVANCE_BCS_BATCH(batch);
268 }
269
270 static void
271 gen75_mfd_pipe_buf_addr_state(VADriverContextP ctx,
272                              struct decode_state *decode_state,
273                              int standard_select,
274                              struct gen7_mfd_context *gen7_mfd_context)
275 {
276     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
277     struct i965_driver_data *i965 = i965_driver_data(ctx);
278     int i;
279
280     if (IS_STEPPING_BPLUS(i965)) {
281         gen75_mfd_pipe_buf_addr_state_bplus(ctx, decode_state,
282                                             standard_select, gen7_mfd_context);
283         return;
284     }
285
286     BEGIN_BCS_BATCH(batch, 25);
287     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
288     if (gen7_mfd_context->pre_deblocking_output.valid)
289         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
290                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
291                       0);
292     else
293         OUT_BCS_BATCH(batch, 0);
294
295     if (gen7_mfd_context->post_deblocking_output.valid)
296         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
297                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
298                       0);
299     else
300         OUT_BCS_BATCH(batch, 0);
301
302     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
303     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
304
305     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
306         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
307                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
308                       0);
309     else
310         OUT_BCS_BATCH(batch, 0);
311
312     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
313         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
314                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
315                       0);
316     else
317         OUT_BCS_BATCH(batch, 0);
318
319     /* DW 7..22 */
320     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
321         struct object_surface *obj_surface;
322
323         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
324             gen7_mfd_context->reference_surface[i].obj_surface &&
325             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
326             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
327
328             OUT_BCS_RELOC(batch, obj_surface->bo,
329                           I915_GEM_DOMAIN_INSTRUCTION, 0,
330                           0);
331         } else {
332             OUT_BCS_BATCH(batch, 0);
333         }
334     }
335
336     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
337     OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
338     ADVANCE_BCS_BATCH(batch);
339 }
340
341 static void
342 gen75_mfd_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
343                                  dri_bo *slice_data_bo,
344                                  int standard_select,
345                                  struct gen7_mfd_context *gen7_mfd_context)
346 {
347     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
348
349     BEGIN_BCS_BATCH(batch, 26);
350     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
351         /* MFX In BS 1-5 */
352     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
353     OUT_BCS_BATCH(batch, 0);
354     OUT_BCS_BATCH(batch, 0);
355         /* Upper bound 4-5 */   
356     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
357     OUT_BCS_BATCH(batch, 0);
358
359         /* MFX indirect MV 6-10 */
360     OUT_BCS_BATCH(batch, 0);
361     OUT_BCS_BATCH(batch, 0);
362     OUT_BCS_BATCH(batch, 0);
363     OUT_BCS_BATCH(batch, 0);
364     OUT_BCS_BATCH(batch, 0);
365         
366         /* MFX IT_COFF 11-15 */
367     OUT_BCS_BATCH(batch, 0);
368     OUT_BCS_BATCH(batch, 0);
369     OUT_BCS_BATCH(batch, 0);
370     OUT_BCS_BATCH(batch, 0);
371     OUT_BCS_BATCH(batch, 0);
372
373         /* MFX IT_DBLK 16-20 */
374     OUT_BCS_BATCH(batch, 0);
375     OUT_BCS_BATCH(batch, 0);
376     OUT_BCS_BATCH(batch, 0);
377     OUT_BCS_BATCH(batch, 0);
378     OUT_BCS_BATCH(batch, 0);
379
380         /* MFX PAK_BSE object for encoder 21-25 */
381     OUT_BCS_BATCH(batch, 0);
382     OUT_BCS_BATCH(batch, 0);
383     OUT_BCS_BATCH(batch, 0);
384     OUT_BCS_BATCH(batch, 0);
385     OUT_BCS_BATCH(batch, 0);
386
387     ADVANCE_BCS_BATCH(batch);
388 }
389
390 static void
391 gen75_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
392                                  dri_bo *slice_data_bo,
393                                  int standard_select,
394                                  struct gen7_mfd_context *gen7_mfd_context)
395 {
396     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
397     struct i965_driver_data *i965 = i965_driver_data(ctx);
398
399     if (IS_STEPPING_BPLUS(i965)) {
400         gen75_mfd_ind_obj_base_addr_state_bplus(ctx, slice_data_bo,
401                                                 standard_select, gen7_mfd_context);
402         return;
403     }
404
405     BEGIN_BCS_BATCH(batch, 11);
406     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
407     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
408     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
409     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
410     OUT_BCS_BATCH(batch, 0);
411     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
412     OUT_BCS_BATCH(batch, 0);
413     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
414     OUT_BCS_BATCH(batch, 0);
415     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
416     OUT_BCS_BATCH(batch, 0);
417     ADVANCE_BCS_BATCH(batch);
418 }
419
420 static void
421 gen75_mfd_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
422                                  struct decode_state *decode_state,
423                                  int standard_select,
424                                  struct gen7_mfd_context *gen7_mfd_context)
425 {
426     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
427
428     BEGIN_BCS_BATCH(batch, 10);
429     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
430
431     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
432         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
433                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
434                       0);
435         else
436                 OUT_BCS_BATCH(batch, 0);
437                 
438     OUT_BCS_BATCH(batch, 0);
439     OUT_BCS_BATCH(batch, 0);
440         /* MPR Row Store Scratch buffer 4-6 */
441     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
442         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
443                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
444                       0);
445     else
446             OUT_BCS_BATCH(batch, 0);
447     OUT_BCS_BATCH(batch, 0);
448     OUT_BCS_BATCH(batch, 0);
449
450         /* Bitplane 7-9 */ 
451     if (gen7_mfd_context->bitplane_read_buffer.valid)
452         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
453                       I915_GEM_DOMAIN_INSTRUCTION, 0,
454                       0);
455     else
456         OUT_BCS_BATCH(batch, 0);
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459
460     ADVANCE_BCS_BATCH(batch);
461 }
462
463 static void
464 gen75_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
465                                  struct decode_state *decode_state,
466                                  int standard_select,
467                                  struct gen7_mfd_context *gen7_mfd_context)
468 {
469     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
470     struct i965_driver_data *i965 = i965_driver_data(ctx);
471
472     if (IS_STEPPING_BPLUS(i965)) {
473         gen75_mfd_bsp_buf_base_addr_state_bplus(ctx, decode_state,
474                                                 standard_select, gen7_mfd_context);
475         return;
476     }
477
478     BEGIN_BCS_BATCH(batch, 4);
479     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
480
481     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
482         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
483                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
484                       0);
485     else
486         OUT_BCS_BATCH(batch, 0);
487
488     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
489         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
490                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
491                       0);
492     else
493         OUT_BCS_BATCH(batch, 0);
494
495     if (gen7_mfd_context->bitplane_read_buffer.valid)
496         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
497                       I915_GEM_DOMAIN_INSTRUCTION, 0,
498                       0);
499     else
500         OUT_BCS_BATCH(batch, 0);
501
502     ADVANCE_BCS_BATCH(batch);
503 }
504
505 static void
506 gen75_mfd_qm_state(VADriverContextP ctx,
507                   int qm_type,
508                   unsigned char *qm,
509                   int qm_length,
510                   struct gen7_mfd_context *gen7_mfd_context)
511 {
512     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
513     unsigned int qm_buffer[16];
514
515     assert(qm_length <= 16 * 4);
516     memcpy(qm_buffer, qm, qm_length);
517
518     BEGIN_BCS_BATCH(batch, 18);
519     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
520     OUT_BCS_BATCH(batch, qm_type << 0);
521     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
522     ADVANCE_BCS_BATCH(batch);
523 }
524
525 static void
526 gen75_mfd_avc_img_state(VADriverContextP ctx,
527                        struct decode_state *decode_state,
528                        struct gen7_mfd_context *gen7_mfd_context)
529 {
530     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
531     int img_struct;
532     int mbaff_frame_flag;
533     unsigned int width_in_mbs, height_in_mbs;
534     VAPictureParameterBufferH264 *pic_param;
535
536     assert(decode_state->pic_param && decode_state->pic_param->buffer);
537     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
538
539     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
540
541     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
542         img_struct = 1;
543     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
544         img_struct = 3;
545     else
546         img_struct = 0;
547
548     if ((img_struct & 0x1) == 0x1) {
549         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
550     } else {
551         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
552     }
553
554     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
555         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
556         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
557     } else {
558         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
559     }
560
561     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
562                         !pic_param->pic_fields.bits.field_pic_flag);
563
564     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
565     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
566
567     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
568     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
569            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
570     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
571
572     BEGIN_BCS_BATCH(batch, 17);
573     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
574     OUT_BCS_BATCH(batch, 
575                   (width_in_mbs * height_in_mbs - 1));
576     OUT_BCS_BATCH(batch, 
577                   ((height_in_mbs - 1) << 16) | 
578                   ((width_in_mbs - 1) << 0));
579     OUT_BCS_BATCH(batch, 
580                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
581                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
582                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
583                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
584                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
585                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
586                   (img_struct << 8));
587     OUT_BCS_BATCH(batch,
588                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
589                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
590                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
591                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
592                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
593                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
594                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
595                   (mbaff_frame_flag << 1) |
596                   (pic_param->pic_fields.bits.field_pic_flag << 0));
597     OUT_BCS_BATCH(batch, 0);
598     OUT_BCS_BATCH(batch, 0);
599     OUT_BCS_BATCH(batch, 0);
600     OUT_BCS_BATCH(batch, 0);
601     OUT_BCS_BATCH(batch, 0);
602     OUT_BCS_BATCH(batch, 0);
603     OUT_BCS_BATCH(batch, 0);
604     OUT_BCS_BATCH(batch, 0);
605     OUT_BCS_BATCH(batch, 0);
606     OUT_BCS_BATCH(batch, 0);
607     OUT_BCS_BATCH(batch, 0);
608     OUT_BCS_BATCH(batch, 0);
609     ADVANCE_BCS_BATCH(batch);
610 }
611
612 static void
613 gen75_mfd_avc_qm_state(VADriverContextP ctx,
614                       struct decode_state *decode_state,
615                       struct gen7_mfd_context *gen7_mfd_context)
616 {
617     VAIQMatrixBufferH264 *iq_matrix;
618     VAPictureParameterBufferH264 *pic_param;
619
620     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
621         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
622     else
623         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
624
625     assert(decode_state->pic_param && decode_state->pic_param->buffer);
626     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
627
628     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
629     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
630
631     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
632         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
633         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
634     }
635 }
636
637 static inline void
638 gen75_mfd_avc_picid_state(VADriverContextP ctx,
639                       struct decode_state *decode_state,
640                       struct gen7_mfd_context *gen7_mfd_context)
641 {
642     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
643         gen7_mfd_context->reference_surface);
644 }
645
646 static void
647 gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
648                               struct decode_state *decode_state,
649                               VAPictureParameterBufferH264 *pic_param,
650                               VASliceParameterBufferH264 *slice_param,
651                               struct gen7_mfd_context *gen7_mfd_context)
652 {
653     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
654     struct object_surface *obj_surface;
655     GenAvcSurface *gen7_avc_surface;
656     VAPictureH264 *va_pic;
657     int i;
658
659     BEGIN_BCS_BATCH(batch, 71);
660     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
661
662     /* reference surfaces 0..15 */
663     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
664         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
665             gen7_mfd_context->reference_surface[i].obj_surface &&
666             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
667
668             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
669             gen7_avc_surface = obj_surface->private_data;
670             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
671                           I915_GEM_DOMAIN_INSTRUCTION, 0,
672                           0);
673             OUT_BCS_BATCH(batch, 0);
674         } else {
675             OUT_BCS_BATCH(batch, 0);
676             OUT_BCS_BATCH(batch, 0);
677         }
678     }
679
680     OUT_BCS_BATCH(batch, 0);
681
682     /* the current decoding frame/field */
683     va_pic = &pic_param->CurrPic;
684     obj_surface = decode_state->render_object;
685     assert(obj_surface->bo && obj_surface->private_data);
686     gen7_avc_surface = obj_surface->private_data;
687
688     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
689                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
690                   0);
691
692     OUT_BCS_BATCH(batch, 0);
693     OUT_BCS_BATCH(batch, 0);
694
695     /* POC List */
696     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
697         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
698
699         if (obj_surface) {
700             const VAPictureH264 * const va_pic = avc_find_picture(
701                 obj_surface->base.id, pic_param->ReferenceFrames,
702                 ARRAY_ELEMS(pic_param->ReferenceFrames));
703
704             assert(va_pic != NULL);
705             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
706             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
707         } else {
708             OUT_BCS_BATCH(batch, 0);
709             OUT_BCS_BATCH(batch, 0);
710         }
711     }
712
713     va_pic = &pic_param->CurrPic;
714     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
715     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
716
717     ADVANCE_BCS_BATCH(batch);
718 }
719
720 static void
721 gen75_mfd_avc_directmode_state(VADriverContextP ctx,
722                               struct decode_state *decode_state,
723                               VAPictureParameterBufferH264 *pic_param,
724                               VASliceParameterBufferH264 *slice_param,
725                               struct gen7_mfd_context *gen7_mfd_context)
726 {
727     struct i965_driver_data *i965 = i965_driver_data(ctx);
728     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
729     struct object_surface *obj_surface;
730     GenAvcSurface *gen7_avc_surface;
731     VAPictureH264 *va_pic;
732     int i;
733
734     if (IS_STEPPING_BPLUS(i965)) {
735         gen75_mfd_avc_directmode_state_bplus(ctx, decode_state, pic_param, slice_param,
736                                              gen7_mfd_context);
737
738         return;
739     }
740
741     BEGIN_BCS_BATCH(batch, 69);
742     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
743
744     /* reference surfaces 0..15 */
745     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
746         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
747             gen7_mfd_context->reference_surface[i].obj_surface &&
748             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
749
750             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
751             gen7_avc_surface = obj_surface->private_data;
752
753             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
754                           I915_GEM_DOMAIN_INSTRUCTION, 0,
755                           0);
756
757             if (gen7_avc_surface->dmv_bottom_flag == 1)
758                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
759                               I915_GEM_DOMAIN_INSTRUCTION, 0,
760                               0);
761             else
762                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
763                               I915_GEM_DOMAIN_INSTRUCTION, 0,
764                               0);
765         } else {
766             OUT_BCS_BATCH(batch, 0);
767             OUT_BCS_BATCH(batch, 0);
768         }
769     }
770
771     /* the current decoding frame/field */
772     va_pic = &pic_param->CurrPic;
773     obj_surface = decode_state->render_object;
774     assert(obj_surface->bo && obj_surface->private_data);
775     gen7_avc_surface = obj_surface->private_data;
776
777     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
778                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
779                   0);
780
781     if (gen7_avc_surface->dmv_bottom_flag == 1)
782         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
783                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
784                       0);
785     else
786         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
787                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
788                       0);
789
790     /* POC List */
791     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
792         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
793
794         if (obj_surface) {
795             const VAPictureH264 * const va_pic = avc_find_picture(
796                 obj_surface->base.id, pic_param->ReferenceFrames,
797                 ARRAY_ELEMS(pic_param->ReferenceFrames));
798
799             assert(va_pic != NULL);
800             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
801             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
802         } else {
803             OUT_BCS_BATCH(batch, 0);
804             OUT_BCS_BATCH(batch, 0);
805         }
806     }
807
808     va_pic = &pic_param->CurrPic;
809     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
810     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
811
812     ADVANCE_BCS_BATCH(batch);
813 }
814
815 static void
816 gen75_mfd_avc_phantom_slice_first(VADriverContextP ctx,
817                                  VAPictureParameterBufferH264 *pic_param,
818                                  VASliceParameterBufferH264 *next_slice_param,
819                                  struct gen7_mfd_context *gen7_mfd_context)
820 {
821     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
822 }
823
824 static void
825 gen75_mfd_avc_slice_state(VADriverContextP ctx,
826                          VAPictureParameterBufferH264 *pic_param,
827                          VASliceParameterBufferH264 *slice_param,
828                          VASliceParameterBufferH264 *next_slice_param,
829                          struct gen7_mfd_context *gen7_mfd_context)
830 {
831     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
832     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
833     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
834     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
835     int num_ref_idx_l0, num_ref_idx_l1;
836     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
837                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
838     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
839     int slice_type;
840
841     if (slice_param->slice_type == SLICE_TYPE_I ||
842         slice_param->slice_type == SLICE_TYPE_SI) {
843         slice_type = SLICE_TYPE_I;
844     } else if (slice_param->slice_type == SLICE_TYPE_P ||
845                slice_param->slice_type == SLICE_TYPE_SP) {
846         slice_type = SLICE_TYPE_P;
847     } else { 
848         assert(slice_param->slice_type == SLICE_TYPE_B);
849         slice_type = SLICE_TYPE_B;
850     }
851
852     if (slice_type == SLICE_TYPE_I) {
853         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
854         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
855         num_ref_idx_l0 = 0;
856         num_ref_idx_l1 = 0;
857     } else if (slice_type == SLICE_TYPE_P) {
858         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
859         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
860         num_ref_idx_l1 = 0;
861     } else {
862         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
863         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
864     }
865
866     first_mb_in_slice = slice_param->first_mb_in_slice;
867     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
868     slice_ver_pos = first_mb_in_slice / width_in_mbs;
869
870     if (mbaff_picture)
871         slice_ver_pos = slice_ver_pos << 1;
872
873     if (next_slice_param) {
874         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
875         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
876         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
877
878         if (mbaff_picture)
879             next_slice_ver_pos = next_slice_ver_pos << 1;
880     } else {
881         next_slice_hor_pos = 0;
882         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
883     }
884
885     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
886     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
887     OUT_BCS_BATCH(batch, slice_type);
888     OUT_BCS_BATCH(batch, 
889                   (num_ref_idx_l1 << 24) |
890                   (num_ref_idx_l0 << 16) |
891                   (slice_param->chroma_log2_weight_denom << 8) |
892                   (slice_param->luma_log2_weight_denom << 0));
893     OUT_BCS_BATCH(batch, 
894                   (slice_param->direct_spatial_mv_pred_flag << 29) |
895                   (slice_param->disable_deblocking_filter_idc << 27) |
896                   (slice_param->cabac_init_idc << 24) |
897                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
898                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
899                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
900     OUT_BCS_BATCH(batch, 
901                   (slice_ver_pos << 24) |
902                   (slice_hor_pos << 16) | 
903                   (first_mb_in_slice << 0));
904     OUT_BCS_BATCH(batch,
905                   (next_slice_ver_pos << 16) |
906                   (next_slice_hor_pos << 0));
907     OUT_BCS_BATCH(batch, 
908                   (next_slice_param == NULL) << 19); /* last slice flag */
909     OUT_BCS_BATCH(batch, 0);
910     OUT_BCS_BATCH(batch, 0);
911     OUT_BCS_BATCH(batch, 0);
912     OUT_BCS_BATCH(batch, 0);
913     ADVANCE_BCS_BATCH(batch);
914 }
915
916 static inline void
917 gen75_mfd_avc_ref_idx_state(VADriverContextP ctx,
918                            VAPictureParameterBufferH264 *pic_param,
919                            VASliceParameterBufferH264 *slice_param,
920                            struct gen7_mfd_context *gen7_mfd_context)
921 {
922     gen6_send_avc_ref_idx_state(
923         gen7_mfd_context->base.batch,
924         slice_param,
925         gen7_mfd_context->reference_surface
926     );
927 }
928
929 static void
930 gen75_mfd_avc_weightoffset_state(VADriverContextP ctx,
931                                 VAPictureParameterBufferH264 *pic_param,
932                                 VASliceParameterBufferH264 *slice_param,
933                                 struct gen7_mfd_context *gen7_mfd_context)
934 {
935     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
936     int i, j, num_weight_offset_table = 0;
937     short weightoffsets[32 * 6];
938
939     if ((slice_param->slice_type == SLICE_TYPE_P ||
940          slice_param->slice_type == SLICE_TYPE_SP) &&
941         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
942         num_weight_offset_table = 1;
943     }
944     
945     if ((slice_param->slice_type == SLICE_TYPE_B) &&
946         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
947         num_weight_offset_table = 2;
948     }
949
950     for (i = 0; i < num_weight_offset_table; i++) {
951         BEGIN_BCS_BATCH(batch, 98);
952         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
953         OUT_BCS_BATCH(batch, i);
954
955         if (i == 0) {
956             for (j = 0; j < 32; j++) {
957                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
958                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
959                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
960                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
961                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
962                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
963             }
964         } else {
965             for (j = 0; j < 32; j++) {
966                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
967                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
968                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
969                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
970                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
971                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
972             }
973         }
974
975         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
976         ADVANCE_BCS_BATCH(batch);
977     }
978 }
979
980 static void
981 gen75_mfd_avc_bsd_object(VADriverContextP ctx,
982                         VAPictureParameterBufferH264 *pic_param,
983                         VASliceParameterBufferH264 *slice_param,
984                         dri_bo *slice_data_bo,
985                         VASliceParameterBufferH264 *next_slice_param,
986                         struct gen7_mfd_context *gen7_mfd_context)
987 {
988     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
989     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
990                                                             slice_param,
991                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
992
993     /* the input bitsteam format on GEN7 differs from GEN6 */
994     BEGIN_BCS_BATCH(batch, 6);
995     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
996     OUT_BCS_BATCH(batch, 
997                   (slice_param->slice_data_size - slice_param->slice_data_offset));
998     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
999     OUT_BCS_BATCH(batch,
1000                   (0 << 31) |
1001                   (0 << 14) |
1002                   (0 << 12) |
1003                   (0 << 10) |
1004                   (0 << 8));
1005     OUT_BCS_BATCH(batch,
1006                   ((slice_data_bit_offset >> 3) << 16) |
1007                   (1 << 7)  |
1008                   (0 << 5)  |
1009                   (0 << 4)  |
1010                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
1011                   (slice_data_bit_offset & 0x7));
1012     OUT_BCS_BATCH(batch, 0);
1013     ADVANCE_BCS_BATCH(batch);
1014 }
1015
1016 static inline void
1017 gen75_mfd_avc_context_init(
1018     VADriverContextP         ctx,
1019     struct gen7_mfd_context *gen7_mfd_context
1020 )
1021 {
1022     /* Initialize flat scaling lists */
1023     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
1024 }
1025
1026 static void
1027 gen75_mfd_avc_decode_init(VADriverContextP ctx,
1028                          struct decode_state *decode_state,
1029                          struct gen7_mfd_context *gen7_mfd_context)
1030 {
1031     VAPictureParameterBufferH264 *pic_param;
1032     VASliceParameterBufferH264 *slice_param;
1033     struct i965_driver_data *i965 = i965_driver_data(ctx);
1034     struct object_surface *obj_surface;
1035     dri_bo *bo;
1036     int i, j, enable_avc_ildb = 0;
1037     unsigned int width_in_mbs, height_in_mbs;
1038
1039     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
1040         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1041         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1042
1043         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1044             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1045             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1046                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1047                    (slice_param->slice_type == SLICE_TYPE_P) ||
1048                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1049                    (slice_param->slice_type == SLICE_TYPE_B));
1050
1051             if (slice_param->disable_deblocking_filter_idc != 1) {
1052                 enable_avc_ildb = 1;
1053                 break;
1054             }
1055
1056             slice_param++;
1057         }
1058     }
1059
1060     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1061     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1062     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
1063         gen7_mfd_context->reference_surface);
1064     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
1065     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
1066     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
1067     assert(height_in_mbs > 0 && height_in_mbs <= 256);
1068
1069     /* Current decoded picture */
1070     obj_surface = decode_state->render_object;
1071     if (pic_param->pic_fields.bits.reference_pic_flag)
1072         obj_surface->flags |= SURFACE_REFERENCED;
1073     else
1074         obj_surface->flags &= ~SURFACE_REFERENCED;
1075
1076     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
1077     gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
1078
1079     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1080     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1081     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1082     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
1083
1084     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1085     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1086     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1087     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1088
1089     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1090     bo = dri_bo_alloc(i965->intel.bufmgr,
1091                       "intra row store",
1092                       width_in_mbs * 64,
1093                       0x1000);
1094     assert(bo);
1095     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1096     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1097
1098     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1099     bo = dri_bo_alloc(i965->intel.bufmgr,
1100                       "deblocking filter row store",
1101                       width_in_mbs * 64 * 4,
1102                       0x1000);
1103     assert(bo);
1104     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1105     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1106
1107     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1108     bo = dri_bo_alloc(i965->intel.bufmgr,
1109                       "bsd mpc row store",
1110                       width_in_mbs * 64 * 2,
1111                       0x1000);
1112     assert(bo);
1113     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1114     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1115
1116     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
1117     bo = dri_bo_alloc(i965->intel.bufmgr,
1118                       "mpr row store",
1119                       width_in_mbs * 64 * 2,
1120                       0x1000);
1121     assert(bo);
1122     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1123     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1124
1125     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1126 }
1127
1128 static void
1129 gen75_mfd_avc_decode_picture(VADriverContextP ctx,
1130                             struct decode_state *decode_state,
1131                             struct gen7_mfd_context *gen7_mfd_context)
1132 {
1133     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1134     VAPictureParameterBufferH264 *pic_param;
1135     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1136     dri_bo *slice_data_bo;
1137     int i, j;
1138
1139     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1140     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1141     gen75_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
1142
1143     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1144     intel_batchbuffer_emit_mi_flush(batch);
1145     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1146     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1147     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1148     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1149     gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
1150     gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
1151     gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
1152
1153     for (j = 0; j < decode_state->num_slice_params; j++) {
1154         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1155         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1156         slice_data_bo = decode_state->slice_datas[j]->bo;
1157         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
1158
1159         if (j == decode_state->num_slice_params - 1)
1160             next_slice_group_param = NULL;
1161         else
1162             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1163
1164         if (j == 0 && slice_param->first_mb_in_slice)
1165             gen75_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); 
1166
1167         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1168             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1169             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1170                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1171                    (slice_param->slice_type == SLICE_TYPE_P) ||
1172                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1173                    (slice_param->slice_type == SLICE_TYPE_B));
1174
1175             if (i < decode_state->slice_params[j]->num_elements - 1)
1176                 next_slice_param = slice_param + 1;
1177             else
1178                 next_slice_param = next_slice_group_param;
1179
1180             gen75_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
1181             gen75_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
1182             gen75_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
1183             gen75_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1184             gen75_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1185             slice_param++;
1186         }
1187     }
1188
1189     intel_batchbuffer_end_atomic(batch);
1190     intel_batchbuffer_flush(batch);
1191 }
1192
1193 static void
1194 gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
1195                            struct decode_state *decode_state,
1196                            struct gen7_mfd_context *gen7_mfd_context)
1197 {
1198     VAPictureParameterBufferMPEG2 *pic_param;
1199     struct i965_driver_data *i965 = i965_driver_data(ctx);
1200     struct object_surface *obj_surface;
1201     dri_bo *bo;
1202     unsigned int width_in_mbs;
1203
1204     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1205     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1206     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1207
1208     mpeg2_set_reference_surfaces(
1209         ctx,
1210         gen7_mfd_context->reference_surface,
1211         decode_state,
1212         pic_param
1213     );
1214
1215     /* Current decoded picture */
1216     obj_surface = decode_state->render_object;
1217     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1218
1219     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1220     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1221     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1222     gen7_mfd_context->pre_deblocking_output.valid = 1;
1223
1224     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1225     bo = dri_bo_alloc(i965->intel.bufmgr,
1226                       "bsd mpc row store",
1227                       width_in_mbs * 96,
1228                       0x1000);
1229     assert(bo);
1230     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1231     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1232
1233     gen7_mfd_context->post_deblocking_output.valid = 0;
1234     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1235     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1236     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1237     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1238 }
1239
1240 static void
1241 gen75_mfd_mpeg2_pic_state(VADriverContextP ctx,
1242                          struct decode_state *decode_state,
1243                          struct gen7_mfd_context *gen7_mfd_context)
1244 {
1245     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1246     VAPictureParameterBufferMPEG2 *pic_param;
1247     unsigned int slice_concealment_disable_bit = 0;
1248
1249     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1250     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1251
1252     slice_concealment_disable_bit = 1;
1253
1254     BEGIN_BCS_BATCH(batch, 13);
1255     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1256     OUT_BCS_BATCH(batch,
1257                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1258                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1259                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1260                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1261                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1262                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1263                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1264                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1265                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1266                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1267                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1268                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1269     OUT_BCS_BATCH(batch,
1270                   pic_param->picture_coding_type << 9);
1271     OUT_BCS_BATCH(batch,
1272                   (slice_concealment_disable_bit << 31) |
1273                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1274                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1275     OUT_BCS_BATCH(batch, 0);
1276     OUT_BCS_BATCH(batch, 0);
1277     OUT_BCS_BATCH(batch, 0);
1278     OUT_BCS_BATCH(batch, 0);
1279     OUT_BCS_BATCH(batch, 0);
1280     OUT_BCS_BATCH(batch, 0);
1281     OUT_BCS_BATCH(batch, 0);
1282     OUT_BCS_BATCH(batch, 0);
1283     OUT_BCS_BATCH(batch, 0);
1284     ADVANCE_BCS_BATCH(batch);
1285 }
1286
1287 static void
1288 gen75_mfd_mpeg2_qm_state(VADriverContextP ctx,
1289                         struct decode_state *decode_state,
1290                         struct gen7_mfd_context *gen7_mfd_context)
1291 {
1292     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1293     int i, j;
1294
1295     /* Update internal QM state */
1296     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1297         VAIQMatrixBufferMPEG2 * const iq_matrix =
1298             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1299
1300         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1301             iq_matrix->load_intra_quantiser_matrix) {
1302             gen_iq_matrix->load_intra_quantiser_matrix =
1303                 iq_matrix->load_intra_quantiser_matrix;
1304             if (iq_matrix->load_intra_quantiser_matrix) {
1305                 for (j = 0; j < 64; j++)
1306                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1307                         iq_matrix->intra_quantiser_matrix[j];
1308             }
1309         }
1310
1311         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1312             iq_matrix->load_non_intra_quantiser_matrix) {
1313             gen_iq_matrix->load_non_intra_quantiser_matrix =
1314                 iq_matrix->load_non_intra_quantiser_matrix;
1315             if (iq_matrix->load_non_intra_quantiser_matrix) {
1316                 for (j = 0; j < 64; j++)
1317                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1318                         iq_matrix->non_intra_quantiser_matrix[j];
1319             }
1320         }
1321     }
1322
1323     /* Commit QM state to HW */
1324     for (i = 0; i < 2; i++) {
1325         unsigned char *qm = NULL;
1326         int qm_type;
1327
1328         if (i == 0) {
1329             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1330                 qm = gen_iq_matrix->intra_quantiser_matrix;
1331                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1332             }
1333         } else {
1334             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1335                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1336                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1337             }
1338         }
1339
1340         if (!qm)
1341             continue;
1342
1343         gen75_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1344     }
1345 }
1346
1347 static void
1348 gen75_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1349                           VAPictureParameterBufferMPEG2 *pic_param,
1350                           VASliceParameterBufferMPEG2 *slice_param,
1351                           VASliceParameterBufferMPEG2 *next_slice_param,
1352                           struct gen7_mfd_context *gen7_mfd_context)
1353 {
1354     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1355     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1356     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1357
1358     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1359         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1360         is_field_pic = 1;
1361     is_field_pic_wa = is_field_pic &&
1362         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1363
1364     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1365     hpos0 = slice_param->slice_horizontal_position;
1366
1367     if (next_slice_param == NULL) {
1368         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1369         hpos1 = 0;
1370     } else {
1371         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1372         hpos1 = next_slice_param->slice_horizontal_position;
1373     }
1374
1375     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1376
1377     BEGIN_BCS_BATCH(batch, 5);
1378     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1379     OUT_BCS_BATCH(batch, 
1380                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1381     OUT_BCS_BATCH(batch, 
1382                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1383     OUT_BCS_BATCH(batch,
1384                   hpos0 << 24 |
1385                   vpos0 << 16 |
1386                   mb_count << 8 |
1387                   (next_slice_param == NULL) << 5 |
1388                   (next_slice_param == NULL) << 3 |
1389                   (slice_param->macroblock_offset & 0x7));
1390     OUT_BCS_BATCH(batch,
1391                   (slice_param->quantiser_scale_code << 24) |
1392                   (vpos1 << 8 | hpos1));
1393     ADVANCE_BCS_BATCH(batch);
1394 }
1395
1396 static void
1397 gen75_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1398                               struct decode_state *decode_state,
1399                               struct gen7_mfd_context *gen7_mfd_context)
1400 {
1401     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1402     VAPictureParameterBufferMPEG2 *pic_param;
1403     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1404     dri_bo *slice_data_bo;
1405     int i, j;
1406
1407     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1408     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1409
1410     gen75_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1411     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1412     intel_batchbuffer_emit_mi_flush(batch);
1413     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1414     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1415     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1416     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1417     gen75_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1418     gen75_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1419
1420     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1421         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1422             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1423
1424     for (j = 0; j < decode_state->num_slice_params; j++) {
1425         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1426         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1427         slice_data_bo = decode_state->slice_datas[j]->bo;
1428         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1429
1430         if (j == decode_state->num_slice_params - 1)
1431             next_slice_group_param = NULL;
1432         else
1433             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1434
1435         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1436             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1437
1438             if (i < decode_state->slice_params[j]->num_elements - 1)
1439                 next_slice_param = slice_param + 1;
1440             else
1441                 next_slice_param = next_slice_group_param;
1442
1443             gen75_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1444             slice_param++;
1445         }
1446     }
1447
1448     intel_batchbuffer_end_atomic(batch);
1449     intel_batchbuffer_flush(batch);
1450 }
1451
1452 static const int va_to_gen7_vc1_pic_type[5] = {
1453     GEN7_VC1_I_PICTURE,
1454     GEN7_VC1_P_PICTURE,
1455     GEN7_VC1_B_PICTURE,
1456     GEN7_VC1_BI_PICTURE,
1457     GEN7_VC1_P_PICTURE,
1458 };
1459
1460 static const int va_to_gen7_vc1_mv[4] = {
1461     1, /* 1-MV */
1462     2, /* 1-MV half-pel */
1463     3, /* 1-MV half-pef bilinear */
1464     0, /* Mixed MV */
1465 };
1466
1467 static const int b_picture_scale_factor[21] = {
1468     128, 85,  170, 64,  192,
1469     51,  102, 153, 204, 43,
1470     215, 37,  74,  111, 148,
1471     185, 222, 32,  96,  160, 
1472     224,
1473 };
1474
1475 static const int va_to_gen7_vc1_condover[3] = {
1476     0,
1477     2,
1478     3
1479 };
1480
1481 static const int va_to_gen7_vc1_profile[4] = {
1482     GEN7_VC1_SIMPLE_PROFILE,
1483     GEN7_VC1_MAIN_PROFILE,
1484     GEN7_VC1_RESERVED_PROFILE,
1485     GEN7_VC1_ADVANCED_PROFILE
1486 };
1487
1488 static void 
1489 gen75_mfd_free_vc1_surface(void **data)
1490 {
1491     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1492
1493     if (!gen7_vc1_surface)
1494         return;
1495
1496     dri_bo_unreference(gen7_vc1_surface->dmv);
1497     free(gen7_vc1_surface);
1498     *data = NULL;
1499 }
1500
1501 static void
1502 gen75_mfd_init_vc1_surface(VADriverContextP ctx, 
1503                           VAPictureParameterBufferVC1 *pic_param,
1504                           struct object_surface *obj_surface)
1505 {
1506     struct i965_driver_data *i965 = i965_driver_data(ctx);
1507     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1508     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1509     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1510
1511     obj_surface->free_private_data = gen75_mfd_free_vc1_surface;
1512
1513     if (!gen7_vc1_surface) {
1514         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1515         assert(gen7_vc1_surface);
1516         assert((obj_surface->size & 0x3f) == 0);
1517         obj_surface->private_data = gen7_vc1_surface;
1518     }
1519
1520     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1521
1522     if (gen7_vc1_surface->dmv == NULL) {
1523         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1524                                              "direct mv w/r buffer",
1525                                              width_in_mbs * height_in_mbs * 64,
1526                                              0x1000);
1527     }
1528 }
1529
1530 static void
1531 gen75_mfd_vc1_decode_init(VADriverContextP ctx,
1532                          struct decode_state *decode_state,
1533                          struct gen7_mfd_context *gen7_mfd_context)
1534 {
1535     VAPictureParameterBufferVC1 *pic_param;
1536     struct i965_driver_data *i965 = i965_driver_data(ctx);
1537     struct object_surface *obj_surface;
1538     dri_bo *bo;
1539     int width_in_mbs;
1540     int picture_type;
1541
1542     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1543     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1544     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1545     picture_type = pic_param->picture_fields.bits.picture_type;
1546  
1547     intel_update_vc1_frame_store_index(ctx,
1548                                        decode_state,
1549                                        pic_param,
1550                                        gen7_mfd_context->reference_surface);
1551
1552     /* Current decoded picture */
1553     obj_surface = decode_state->render_object;
1554     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1555     gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1556
1557     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1558     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1559     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1560     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1561
1562     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1563     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1564     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1565     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1566
1567     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1568     bo = dri_bo_alloc(i965->intel.bufmgr,
1569                       "intra row store",
1570                       width_in_mbs * 64,
1571                       0x1000);
1572     assert(bo);
1573     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1574     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1575
1576     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1577     bo = dri_bo_alloc(i965->intel.bufmgr,
1578                       "deblocking filter row store",
1579                       width_in_mbs * 7 * 64,
1580                       0x1000);
1581     assert(bo);
1582     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1583     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1584
1585     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1586     bo = dri_bo_alloc(i965->intel.bufmgr,
1587                       "bsd mpc row store",
1588                       width_in_mbs * 96,
1589                       0x1000);
1590     assert(bo);
1591     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1592     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1593
1594     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1595
1596     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1597     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1598     
1599     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1600         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1601         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1602         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1603         int src_w, src_h;
1604         uint8_t *src = NULL, *dst = NULL;
1605
1606         assert(decode_state->bit_plane->buffer);
1607         src = decode_state->bit_plane->buffer;
1608
1609         bo = dri_bo_alloc(i965->intel.bufmgr,
1610                           "VC-1 Bitplane",
1611                           bitplane_width * height_in_mbs,
1612                           0x1000);
1613         assert(bo);
1614         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1615
1616         dri_bo_map(bo, True);
1617         assert(bo->virtual);
1618         dst = bo->virtual;
1619
1620         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1621             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1622                 int src_index, dst_index;
1623                 int src_shift;
1624                 uint8_t src_value;
1625
1626                 src_index = (src_h * width_in_mbs + src_w) / 2;
1627                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1628                 src_value = ((src[src_index] >> src_shift) & 0xf);
1629
1630                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1631                     src_value |= 0x2;
1632                 }
1633
1634                 dst_index = src_w / 2;
1635                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1636             }
1637
1638             if (src_w & 1)
1639                 dst[src_w / 2] >>= 4;
1640
1641             dst += bitplane_width;
1642         }
1643
1644         dri_bo_unmap(bo);
1645     } else
1646         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1647 }
1648
1649 static void
1650 gen75_mfd_vc1_pic_state(VADriverContextP ctx,
1651                        struct decode_state *decode_state,
1652                        struct gen7_mfd_context *gen7_mfd_context)
1653 {
1654     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1655     VAPictureParameterBufferVC1 *pic_param;
1656     struct object_surface *obj_surface;
1657     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1658     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1659     int unified_mv_mode;
1660     int ref_field_pic_polarity = 0;
1661     int scale_factor = 0;
1662     int trans_ac_y = 0;
1663     int dmv_surface_valid = 0;
1664     int brfd = 0;
1665     int fcm = 0;
1666     int picture_type;
1667     int profile;
1668     int overlap;
1669     int interpolation_mode = 0;
1670
1671     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1672     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1673
1674     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1675     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1676     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1677     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1678     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1679     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1680     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1681     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1682
1683     if (dquant == 0) {
1684         alt_pquant_config = 0;
1685         alt_pquant_edge_mask = 0;
1686     } else if (dquant == 2) {
1687         alt_pquant_config = 1;
1688         alt_pquant_edge_mask = 0xf;
1689     } else {
1690         assert(dquant == 1);
1691         if (dquantfrm == 0) {
1692             alt_pquant_config = 0;
1693             alt_pquant_edge_mask = 0;
1694             alt_pq = 0;
1695         } else {
1696             assert(dquantfrm == 1);
1697             alt_pquant_config = 1;
1698
1699             switch (dqprofile) {
1700             case 3:
1701                 if (dqbilevel == 0) {
1702                     alt_pquant_config = 2;
1703                     alt_pquant_edge_mask = 0;
1704                 } else {
1705                     assert(dqbilevel == 1);
1706                     alt_pquant_config = 3;
1707                     alt_pquant_edge_mask = 0;
1708                 }
1709                 break;
1710                 
1711             case 0:
1712                 alt_pquant_edge_mask = 0xf;
1713                 break;
1714
1715             case 1:
1716                 if (dqdbedge == 3)
1717                     alt_pquant_edge_mask = 0x9;
1718                 else
1719                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1720
1721                 break;
1722
1723             case 2:
1724                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1725                 break;
1726
1727             default:
1728                 assert(0);
1729             }
1730         }
1731     }
1732
1733     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1734         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1735         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1736     } else {
1737         assert(pic_param->mv_fields.bits.mv_mode < 4);
1738         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1739     }
1740
1741     if (pic_param->sequence_fields.bits.interlace == 1 &&
1742         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1743         /* FIXME: calculate reference field picture polarity */
1744         assert(0);
1745         ref_field_pic_polarity = 0;
1746     }
1747
1748     if (pic_param->b_picture_fraction < 21)
1749         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1750
1751     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1752     
1753     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1754         picture_type == GEN7_VC1_I_PICTURE)
1755         picture_type = GEN7_VC1_BI_PICTURE;
1756
1757     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1758         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1759     else {
1760         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1761
1762         /*
1763          * 8.3.6.2.1 Transform Type Selection
1764          * If variable-sized transform coding is not enabled,
1765          * then the 8x8 transform shall be used for all blocks.
1766          * it is also MFX_VC1_PIC_STATE requirement.
1767          */
1768         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1769             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1770             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1771         }
1772     }
1773
1774     if (picture_type == GEN7_VC1_B_PICTURE) {
1775         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1776
1777         obj_surface = decode_state->reference_objects[1];
1778
1779         if (obj_surface)
1780             gen7_vc1_surface = obj_surface->private_data;
1781
1782         if (!gen7_vc1_surface || 
1783             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1784              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1785             dmv_surface_valid = 0;
1786         else
1787             dmv_surface_valid = 1;
1788     }
1789
1790     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1791
1792     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1793         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1794     else {
1795         if (pic_param->picture_fields.bits.top_field_first)
1796             fcm = 2;
1797         else
1798             fcm = 3;
1799     }
1800
1801     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1802         brfd = pic_param->reference_fields.bits.reference_distance;
1803         brfd = (scale_factor * brfd) >> 8;
1804         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1805
1806         if (brfd < 0)
1807             brfd = 0;
1808     }
1809
1810     overlap = pic_param->sequence_fields.bits.overlap;
1811
1812     if (overlap) {
1813         overlap = 0;
1814         if (profile != GEN7_VC1_ADVANCED_PROFILE){
1815             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1816                 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1817                 overlap = 1;
1818             }
1819         }else {
1820             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1821                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1822                 overlap = 1;
1823             }
1824             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1825                 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1826                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1827                     overlap = 1;
1828                 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1829                            va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1830                     overlap = 1;
1831                 }
1832             }
1833         }
1834     } 
1835
1836     assert(pic_param->conditional_overlap_flag < 3);
1837     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1838
1839     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1840         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1841          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1842         interpolation_mode = 9; /* Half-pel bilinear */
1843     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1844              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1845               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1846         interpolation_mode = 1; /* Half-pel bicubic */
1847     else
1848         interpolation_mode = 0; /* Quarter-pel bicubic */
1849
1850     BEGIN_BCS_BATCH(batch, 6);
1851     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1852     OUT_BCS_BATCH(batch,
1853                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1854                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1855     OUT_BCS_BATCH(batch,
1856                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1857                   dmv_surface_valid << 15 |
1858                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1859                   pic_param->rounding_control << 13 |
1860                   pic_param->sequence_fields.bits.syncmarker << 12 |
1861                   interpolation_mode << 8 |
1862                   0 << 7 | /* FIXME: scale up or down ??? */
1863                   pic_param->range_reduction_frame << 6 |
1864                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1865                   overlap << 4 |
1866                   !pic_param->picture_fields.bits.is_first_field << 3 |
1867                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1868     OUT_BCS_BATCH(batch,
1869                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1870                   picture_type << 26 |
1871                   fcm << 24 |
1872                   alt_pq << 16 |
1873                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1874                   scale_factor << 0);
1875     OUT_BCS_BATCH(batch,
1876                   unified_mv_mode << 28 |
1877                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1878                   pic_param->fast_uvmc_flag << 26 |
1879                   ref_field_pic_polarity << 25 |
1880                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1881                   pic_param->reference_fields.bits.reference_distance << 20 |
1882                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1883                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1884                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1885                   alt_pquant_edge_mask << 4 |
1886                   alt_pquant_config << 2 |
1887                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1888                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1889     OUT_BCS_BATCH(batch,
1890                   !!pic_param->bitplane_present.value << 31 |
1891                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1892                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1893                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1894                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1895                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1896                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1897                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1898                   pic_param->mv_fields.bits.mv_table << 20 |
1899                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1900                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1901                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1902                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1903                   pic_param->mb_mode_table << 8 |
1904                   trans_ac_y << 6 |
1905                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1906                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1907                   pic_param->cbp_table << 0);
1908     ADVANCE_BCS_BATCH(batch);
1909 }
1910
1911 static void
1912 gen75_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1913                              struct decode_state *decode_state,
1914                              struct gen7_mfd_context *gen7_mfd_context)
1915 {
1916     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1917     VAPictureParameterBufferVC1 *pic_param;
1918     int intensitycomp_single;
1919
1920     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1921     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1922     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1923
1924     BEGIN_BCS_BATCH(batch, 6);
1925     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1926     OUT_BCS_BATCH(batch,
1927                   0 << 14 | /* FIXME: double ??? */
1928                   0 << 12 |
1929                   intensitycomp_single << 10 |
1930                   intensitycomp_single << 8 |
1931                   0 << 4 | /* FIXME: interlace mode */
1932                   0);
1933     OUT_BCS_BATCH(batch,
1934                   pic_param->luma_shift << 16 |
1935                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1936     OUT_BCS_BATCH(batch, 0);
1937     OUT_BCS_BATCH(batch, 0);
1938     OUT_BCS_BATCH(batch, 0);
1939     ADVANCE_BCS_BATCH(batch);
1940 }
1941
1942 static void
1943 gen75_mfd_vc1_directmode_state_bplus(VADriverContextP ctx,
1944                               struct decode_state *decode_state,
1945                               struct gen7_mfd_context *gen7_mfd_context)
1946 {
1947     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1948     struct object_surface *obj_surface;
1949     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1950
1951     obj_surface = decode_state->render_object;
1952
1953     if (obj_surface && obj_surface->private_data) {
1954         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1955     }
1956
1957     obj_surface = decode_state->reference_objects[1];
1958
1959     if (obj_surface && obj_surface->private_data) {
1960         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1961     }
1962
1963     BEGIN_BCS_BATCH(batch, 7);
1964     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1965
1966     if (dmv_write_buffer)
1967         OUT_BCS_RELOC(batch, dmv_write_buffer,
1968                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1969                       0);
1970     else
1971         OUT_BCS_BATCH(batch, 0);
1972
1973         OUT_BCS_BATCH(batch, 0);
1974         OUT_BCS_BATCH(batch, 0);
1975
1976     if (dmv_read_buffer)
1977         OUT_BCS_RELOC(batch, dmv_read_buffer,
1978                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1979                       0);
1980     else
1981         OUT_BCS_BATCH(batch, 0);
1982         OUT_BCS_BATCH(batch, 0);
1983         OUT_BCS_BATCH(batch, 0);
1984                   
1985     ADVANCE_BCS_BATCH(batch);
1986 }
1987
1988 static void
1989 gen75_mfd_vc1_directmode_state(VADriverContextP ctx,
1990                               struct decode_state *decode_state,
1991                               struct gen7_mfd_context *gen7_mfd_context)
1992 {
1993     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1994     struct i965_driver_data *i965 = i965_driver_data(ctx);
1995     struct object_surface *obj_surface;
1996     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1997
1998     if (IS_STEPPING_BPLUS(i965)) {
1999         gen75_mfd_vc1_directmode_state_bplus(ctx, decode_state, gen7_mfd_context);
2000         return;
2001     }
2002
2003     obj_surface = decode_state->render_object;
2004
2005     if (obj_surface && obj_surface->private_data) {
2006         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2007     }
2008
2009     obj_surface = decode_state->reference_objects[1];
2010
2011     if (obj_surface && obj_surface->private_data) {
2012         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2013     }
2014
2015     BEGIN_BCS_BATCH(batch, 3);
2016     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
2017
2018     if (dmv_write_buffer)
2019         OUT_BCS_RELOC(batch, dmv_write_buffer,
2020                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2021                       0);
2022     else
2023         OUT_BCS_BATCH(batch, 0);
2024
2025     if (dmv_read_buffer)
2026         OUT_BCS_RELOC(batch, dmv_read_buffer,
2027                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2028                       0);
2029     else
2030         OUT_BCS_BATCH(batch, 0);
2031                   
2032     ADVANCE_BCS_BATCH(batch);
2033 }
2034
2035 static int
2036 gen75_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
2037 {
2038     int out_slice_data_bit_offset;
2039     int slice_header_size = in_slice_data_bit_offset / 8;
2040     int i, j;
2041
2042     if (profile != 3)
2043         out_slice_data_bit_offset = in_slice_data_bit_offset;
2044     else {
2045         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
2046             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
2047                 i++, j += 2;
2048             }
2049         }
2050
2051         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
2052     }
2053
2054     return out_slice_data_bit_offset;
2055 }
2056
2057 static void
2058 gen75_mfd_vc1_bsd_object(VADriverContextP ctx,
2059                         VAPictureParameterBufferVC1 *pic_param,
2060                         VASliceParameterBufferVC1 *slice_param,
2061                         VASliceParameterBufferVC1 *next_slice_param,
2062                         dri_bo *slice_data_bo,
2063                         struct gen7_mfd_context *gen7_mfd_context)
2064 {
2065     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2066     int next_slice_start_vert_pos;
2067     int macroblock_offset;
2068     uint8_t *slice_data = NULL;
2069
2070     dri_bo_map(slice_data_bo, 0);
2071     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
2072     macroblock_offset = gen75_mfd_vc1_get_macroblock_bit_offset(slice_data, 
2073                                                                slice_param->macroblock_offset,
2074                                                                pic_param->sequence_fields.bits.profile);
2075     dri_bo_unmap(slice_data_bo);
2076
2077     if (next_slice_param)
2078         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
2079     else
2080         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
2081
2082     BEGIN_BCS_BATCH(batch, 5);
2083     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
2084     OUT_BCS_BATCH(batch, 
2085                   slice_param->slice_data_size - (macroblock_offset >> 3));
2086     OUT_BCS_BATCH(batch, 
2087                   slice_param->slice_data_offset + (macroblock_offset >> 3));
2088     OUT_BCS_BATCH(batch,
2089                   slice_param->slice_vertical_position << 16 |
2090                   next_slice_start_vert_pos << 0);
2091     OUT_BCS_BATCH(batch,
2092                   (macroblock_offset & 0x7));
2093     ADVANCE_BCS_BATCH(batch);
2094 }
2095
2096 static void
2097 gen75_mfd_vc1_decode_picture(VADriverContextP ctx,
2098                             struct decode_state *decode_state,
2099                             struct gen7_mfd_context *gen7_mfd_context)
2100 {
2101     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2102     VAPictureParameterBufferVC1 *pic_param;
2103     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
2104     dri_bo *slice_data_bo;
2105     int i, j;
2106
2107     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2108     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2109
2110     gen75_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
2111     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2112     intel_batchbuffer_emit_mi_flush(batch);
2113     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2114     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2115     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2116     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2117     gen75_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
2118     gen75_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
2119     gen75_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
2120
2121     for (j = 0; j < decode_state->num_slice_params; j++) {
2122         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2123         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2124         slice_data_bo = decode_state->slice_datas[j]->bo;
2125         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
2126
2127         if (j == decode_state->num_slice_params - 1)
2128             next_slice_group_param = NULL;
2129         else
2130             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2131
2132         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2133             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2134
2135             if (i < decode_state->slice_params[j]->num_elements - 1)
2136                 next_slice_param = slice_param + 1;
2137             else
2138                 next_slice_param = next_slice_group_param;
2139
2140             gen75_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2141             slice_param++;
2142         }
2143     }
2144
2145     intel_batchbuffer_end_atomic(batch);
2146     intel_batchbuffer_flush(batch);
2147 }
2148
2149 static void
2150 gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
2151                           struct decode_state *decode_state,
2152                           struct gen7_mfd_context *gen7_mfd_context)
2153 {
2154     struct object_surface *obj_surface;
2155     VAPictureParameterBufferJPEGBaseline *pic_param;
2156     int subsampling = SUBSAMPLE_YUV420;
2157     int fourcc = VA_FOURCC_IMC3;
2158
2159     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2160
2161     if (pic_param->num_components == 1) {
2162         subsampling = SUBSAMPLE_YUV400;
2163         fourcc = VA_FOURCC_Y800;
2164     } else if (pic_param->num_components == 3) {
2165         int h1 = pic_param->components[0].h_sampling_factor;
2166         int h2 = pic_param->components[1].h_sampling_factor;
2167         int h3 = pic_param->components[2].h_sampling_factor;
2168         int v1 = pic_param->components[0].v_sampling_factor;
2169         int v2 = pic_param->components[1].v_sampling_factor;
2170         int v3 = pic_param->components[2].v_sampling_factor;
2171
2172         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2173             v1 == 2 && v2 == 1 && v3 == 1) {
2174             subsampling = SUBSAMPLE_YUV420;
2175             fourcc = VA_FOURCC_IMC3;
2176         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2177                    v1 == 1 && v2 == 1 && v3 == 1) {
2178             subsampling = SUBSAMPLE_YUV422H;
2179             fourcc = VA_FOURCC_422H;
2180         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2181                    v1 == 1 && v2 == 1 && v3 == 1) {
2182             subsampling = SUBSAMPLE_YUV444;
2183             fourcc = VA_FOURCC_444P;
2184         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2185                    v1 == 1 && v2 == 1 && v3 == 1) {
2186             subsampling = SUBSAMPLE_YUV411;
2187             fourcc = VA_FOURCC_411P;
2188         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2189                    v1 == 2 && v2 == 1 && v3 == 1) {
2190             subsampling = SUBSAMPLE_YUV422V;
2191             fourcc = VA_FOURCC_422V;
2192         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2193                    v1 == 2 && v2 == 2 && v3 == 2) {
2194             subsampling = SUBSAMPLE_YUV422H;
2195             fourcc = VA_FOURCC_422H;
2196         } else if (h1 == 2 && h2 == 2 && h3 == 2 &&
2197                    v1 == 2 && v2 == 1 && v3 == 1) {
2198             subsampling = SUBSAMPLE_YUV422V;
2199             fourcc = VA_FOURCC_422V;
2200         } else
2201             assert(0);
2202     } else {
2203         assert(0);
2204     }
2205
2206     /* Current decoded picture */
2207     obj_surface = decode_state->render_object;
2208     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
2209
2210     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2211     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2212     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2213     gen7_mfd_context->pre_deblocking_output.valid = 1;
2214
2215     gen7_mfd_context->post_deblocking_output.bo = NULL;
2216     gen7_mfd_context->post_deblocking_output.valid = 0;
2217
2218     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2219     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2220
2221     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2222     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2223
2224     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2225     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2226
2227     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2228     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2229
2230     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2231     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2232 }
2233
2234 static const int va_to_gen7_jpeg_rotation[4] = {
2235     GEN7_JPEG_ROTATION_0,
2236     GEN7_JPEG_ROTATION_90,
2237     GEN7_JPEG_ROTATION_180,
2238     GEN7_JPEG_ROTATION_270
2239 };
2240
2241 static void
2242 gen75_mfd_jpeg_pic_state(VADriverContextP ctx,
2243                         struct decode_state *decode_state,
2244                         struct gen7_mfd_context *gen7_mfd_context)
2245 {
2246     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2247     VAPictureParameterBufferJPEGBaseline *pic_param;
2248     int chroma_type = GEN7_YUV420;
2249     int frame_width_in_blks;
2250     int frame_height_in_blks;
2251
2252     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2253     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2254
2255     if (pic_param->num_components == 1)
2256         chroma_type = GEN7_YUV400;
2257     else if (pic_param->num_components == 3) {
2258         int h1 = pic_param->components[0].h_sampling_factor;
2259         int h2 = pic_param->components[1].h_sampling_factor;
2260         int h3 = pic_param->components[2].h_sampling_factor;
2261         int v1 = pic_param->components[0].v_sampling_factor;
2262         int v2 = pic_param->components[1].v_sampling_factor;
2263         int v3 = pic_param->components[2].v_sampling_factor;
2264
2265         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2266             v1 == 2 && v2 == 1 && v3 == 1)
2267             chroma_type = GEN7_YUV420;
2268         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2269                  v1 == 1 && v2 == 1 && v3 == 1)
2270             chroma_type = GEN7_YUV422H_2Y;
2271         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2272                  v1 == 1 && v2 == 1 && v3 == 1)
2273             chroma_type = GEN7_YUV444;
2274         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2275                  v1 == 1 && v2 == 1 && v3 == 1)
2276             chroma_type = GEN7_YUV411;
2277         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2278                  v1 == 2 && v2 == 1 && v3 == 1)
2279             chroma_type = GEN7_YUV422V_2Y;
2280         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2281                  v1 == 2 && v2 == 2 && v3 == 2)
2282             chroma_type = GEN7_YUV422H_4Y;
2283         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2284                  v1 == 2 && v2 == 1 && v3 == 1)
2285             chroma_type = GEN7_YUV422V_4Y;
2286         else
2287             assert(0);
2288     }
2289
2290     if (chroma_type == GEN7_YUV400 ||
2291         chroma_type == GEN7_YUV444 ||
2292         chroma_type == GEN7_YUV422V_2Y) {
2293         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2294         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2295     } else if (chroma_type == GEN7_YUV411) {
2296         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2297         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2298     } else {
2299         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2300         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2301     }
2302
2303     BEGIN_BCS_BATCH(batch, 3);
2304     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2305     OUT_BCS_BATCH(batch,
2306                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2307                   (chroma_type << 0));
2308     OUT_BCS_BATCH(batch,
2309                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2310                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2311     ADVANCE_BCS_BATCH(batch);
2312 }
2313
2314 static const int va_to_gen7_jpeg_hufftable[2] = {
2315     MFX_HUFFTABLE_ID_Y,
2316     MFX_HUFFTABLE_ID_UV
2317 };
2318
2319 static void
2320 gen75_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2321                                struct decode_state *decode_state,
2322                                struct gen7_mfd_context *gen7_mfd_context,
2323                                int num_tables)
2324 {
2325     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2326     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2327     int index;
2328
2329     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2330         return;
2331
2332     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2333
2334     for (index = 0; index < num_tables; index++) {
2335         int id = va_to_gen7_jpeg_hufftable[index];
2336
2337         if (!huffman_table->load_huffman_table[index])
2338             continue;
2339
2340         BEGIN_BCS_BATCH(batch, 53);
2341         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2342         OUT_BCS_BATCH(batch, id);
2343         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2344         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2345         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2346         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2347         ADVANCE_BCS_BATCH(batch);
2348     }
2349 }
2350
2351 static const int va_to_gen7_jpeg_qm[5] = {
2352     -1,
2353     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2354     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2355     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2356     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2357 };
2358
2359 static void
2360 gen75_mfd_jpeg_qm_state(VADriverContextP ctx,
2361                        struct decode_state *decode_state,
2362                        struct gen7_mfd_context *gen7_mfd_context)
2363 {
2364     VAPictureParameterBufferJPEGBaseline *pic_param;
2365     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2366     int index;
2367
2368     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2369         return;
2370
2371     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2372     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2373
2374     assert(pic_param->num_components <= 3);
2375
2376     for (index = 0; index < pic_param->num_components; index++) {
2377         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2378         int qm_type;
2379         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2380         unsigned char raster_qm[64];
2381         int j;
2382
2383         if (id > 4 || id < 1)
2384             continue;
2385
2386         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2387             continue;
2388
2389         qm_type = va_to_gen7_jpeg_qm[id];
2390
2391         for (j = 0; j < 64; j++)
2392             raster_qm[zigzag_direct[j]] = qm[j];
2393
2394         gen75_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2395     }
2396 }
2397
2398 static void
2399 gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
2400                          VAPictureParameterBufferJPEGBaseline *pic_param,
2401                          VASliceParameterBufferJPEGBaseline *slice_param,
2402                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2403                          dri_bo *slice_data_bo,
2404                          struct gen7_mfd_context *gen7_mfd_context)
2405 {
2406     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2407     int scan_component_mask = 0;
2408     int i;
2409
2410     assert(slice_param->num_components > 0);
2411     assert(slice_param->num_components < 4);
2412     assert(slice_param->num_components <= pic_param->num_components);
2413
2414     for (i = 0; i < slice_param->num_components; i++) {
2415         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2416         case 1:
2417             scan_component_mask |= (1 << 0);
2418             break;
2419         case 2:
2420             scan_component_mask |= (1 << 1);
2421             break;
2422         case 3:
2423             scan_component_mask |= (1 << 2);
2424             break;
2425         default:
2426             assert(0);
2427             break;
2428         }
2429     }
2430
2431     BEGIN_BCS_BATCH(batch, 6);
2432     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2433     OUT_BCS_BATCH(batch, 
2434                   slice_param->slice_data_size);
2435     OUT_BCS_BATCH(batch, 
2436                   slice_param->slice_data_offset);
2437     OUT_BCS_BATCH(batch,
2438                   slice_param->slice_horizontal_position << 16 |
2439                   slice_param->slice_vertical_position << 0);
2440     OUT_BCS_BATCH(batch,
2441                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2442                   (scan_component_mask << 27) |                 /* scan components */
2443                   (0 << 26) |   /* disable interrupt allowed */
2444                   (slice_param->num_mcus << 0));                /* MCU count */
2445     OUT_BCS_BATCH(batch,
2446                   (slice_param->restart_interval << 0));    /* RestartInterval */
2447     ADVANCE_BCS_BATCH(batch);
2448 }
2449
2450 /* Workaround for JPEG decoding on Ivybridge */
2451
2452 static struct {
2453     int width;
2454     int height;
2455     unsigned char data[32];
2456     int data_size;
2457     int data_bit_offset;
2458     int qp;
2459 } gen7_jpeg_wa_clip = {
2460     16,
2461     16,
2462     {
2463         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2464         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2465     },
2466     14,
2467     40,
2468     28,
2469 };
2470
2471 static void
2472 gen75_jpeg_wa_init(VADriverContextP ctx,
2473                   struct gen7_mfd_context *gen7_mfd_context)
2474 {
2475     struct i965_driver_data *i965 = i965_driver_data(ctx);
2476     VAStatus status;
2477     struct object_surface *obj_surface;
2478
2479     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2480         i965_DestroySurfaces(ctx,
2481                              &gen7_mfd_context->jpeg_wa_surface_id,
2482                              1);
2483
2484     status = i965_CreateSurfaces(ctx,
2485                                  gen7_jpeg_wa_clip.width,
2486                                  gen7_jpeg_wa_clip.height,
2487                                  VA_RT_FORMAT_YUV420,
2488                                  1,
2489                                  &gen7_mfd_context->jpeg_wa_surface_id);
2490     assert(status == VA_STATUS_SUCCESS);
2491
2492     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2493     assert(obj_surface);
2494     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2495     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2496
2497     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2498         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2499                                                                "JPEG WA data",
2500                                                                0x1000,
2501                                                                0x1000);
2502         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2503                        0,
2504                        gen7_jpeg_wa_clip.data_size,
2505                        gen7_jpeg_wa_clip.data);
2506     }
2507 }
2508
2509 static void
2510 gen75_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2511                               struct gen7_mfd_context *gen7_mfd_context)
2512 {
2513     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2514
2515     BEGIN_BCS_BATCH(batch, 5);
2516     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2517     OUT_BCS_BATCH(batch,
2518                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2519                   (MFD_MODE_VLD << 15) | /* VLD mode */
2520                   (0 << 10) | /* disable Stream-Out */
2521                   (0 << 9)  | /* Post Deblocking Output */
2522                   (1 << 8)  | /* Pre Deblocking Output */
2523                   (0 << 5)  | /* not in stitch mode */
2524                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2525                   (MFX_FORMAT_AVC << 0));
2526     OUT_BCS_BATCH(batch,
2527                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2528                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2529                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2530                   (0 << 1)  |
2531                   (0 << 0));
2532     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2533     OUT_BCS_BATCH(batch, 0); /* reserved */
2534     ADVANCE_BCS_BATCH(batch);
2535 }
2536
2537 static void
2538 gen75_jpeg_wa_surface_state(VADriverContextP ctx,
2539                            struct gen7_mfd_context *gen7_mfd_context)
2540 {
2541     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2542     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2543
2544     BEGIN_BCS_BATCH(batch, 6);
2545     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2546     OUT_BCS_BATCH(batch, 0);
2547     OUT_BCS_BATCH(batch,
2548                   ((obj_surface->orig_width - 1) << 18) |
2549                   ((obj_surface->orig_height - 1) << 4));
2550     OUT_BCS_BATCH(batch,
2551                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2552                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2553                   (0 << 22) | /* surface object control state, ignored */
2554                   ((obj_surface->width - 1) << 3) | /* pitch */
2555                   (0 << 2)  | /* must be 0 */
2556                   (1 << 1)  | /* must be tiled */
2557                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2558     OUT_BCS_BATCH(batch,
2559                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2560                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2561     OUT_BCS_BATCH(batch,
2562                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2563                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2564     ADVANCE_BCS_BATCH(batch);
2565 }
2566
2567 static void
2568 gen75_jpeg_wa_pipe_buf_addr_state_bplus(VADriverContextP ctx,
2569                                  struct gen7_mfd_context *gen7_mfd_context)
2570 {
2571     struct i965_driver_data *i965 = i965_driver_data(ctx);
2572     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2573     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2574     dri_bo *intra_bo;
2575     int i;
2576
2577     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2578                             "intra row store",
2579                             128 * 64,
2580                             0x1000);
2581
2582     BEGIN_BCS_BATCH(batch, 61);
2583     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2584     OUT_BCS_RELOC(batch,
2585                   obj_surface->bo,
2586                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2587                   0);
2588         OUT_BCS_BATCH(batch, 0);
2589         OUT_BCS_BATCH(batch, 0);
2590     
2591
2592     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2593         OUT_BCS_BATCH(batch, 0);
2594         OUT_BCS_BATCH(batch, 0);
2595
2596         /* uncompressed-video & stream out 7-12 */
2597     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2598     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2599         OUT_BCS_BATCH(batch, 0);
2600         OUT_BCS_BATCH(batch, 0);
2601         OUT_BCS_BATCH(batch, 0);
2602         OUT_BCS_BATCH(batch, 0);
2603
2604         /* the DW 13-15 is for intra row store scratch */
2605     OUT_BCS_RELOC(batch,
2606                   intra_bo,
2607                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2608                   0);
2609         OUT_BCS_BATCH(batch, 0);
2610         OUT_BCS_BATCH(batch, 0);
2611
2612         /* the DW 16-18 is for deblocking filter */ 
2613     OUT_BCS_BATCH(batch, 0);
2614         OUT_BCS_BATCH(batch, 0);
2615         OUT_BCS_BATCH(batch, 0);
2616
2617     /* DW 19..50 */
2618     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2619         OUT_BCS_BATCH(batch, 0);
2620         OUT_BCS_BATCH(batch, 0);
2621     }
2622     OUT_BCS_BATCH(batch, 0);
2623
2624         /* the DW52-54 is for mb status address */
2625     OUT_BCS_BATCH(batch, 0);
2626         OUT_BCS_BATCH(batch, 0);
2627         OUT_BCS_BATCH(batch, 0);
2628         /* the DW56-60 is for ILDB & second ILDB address */
2629     OUT_BCS_BATCH(batch, 0);
2630         OUT_BCS_BATCH(batch, 0);
2631         OUT_BCS_BATCH(batch, 0);
2632     OUT_BCS_BATCH(batch, 0);
2633         OUT_BCS_BATCH(batch, 0);
2634         OUT_BCS_BATCH(batch, 0);
2635
2636     ADVANCE_BCS_BATCH(batch);
2637
2638     dri_bo_unreference(intra_bo);
2639 }
2640
2641 static void
2642 gen75_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2643                                  struct gen7_mfd_context *gen7_mfd_context)
2644 {
2645     struct i965_driver_data *i965 = i965_driver_data(ctx);
2646     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2647     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2648     dri_bo *intra_bo;
2649     int i;
2650
2651     if (IS_STEPPING_BPLUS(i965)) {
2652         gen75_jpeg_wa_pipe_buf_addr_state_bplus(ctx, gen7_mfd_context);
2653         return;
2654     }
2655
2656     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2657                             "intra row store",
2658                             128 * 64,
2659                             0x1000);
2660
2661     BEGIN_BCS_BATCH(batch, 25);
2662     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
2663     OUT_BCS_RELOC(batch,
2664                   obj_surface->bo,
2665                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2666                   0);
2667     
2668     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2669
2670     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2671     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2672
2673     OUT_BCS_RELOC(batch,
2674                   intra_bo,
2675                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2676                   0);
2677
2678     OUT_BCS_BATCH(batch, 0);
2679
2680     /* DW 7..22 */
2681     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2682         OUT_BCS_BATCH(batch, 0);
2683     }
2684
2685     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2686     OUT_BCS_BATCH(batch, 0);
2687     ADVANCE_BCS_BATCH(batch);
2688
2689     dri_bo_unreference(intra_bo);
2690 }
2691
2692 static void
2693 gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
2694                                      struct gen7_mfd_context *gen7_mfd_context)
2695 {
2696     struct i965_driver_data *i965 = i965_driver_data(ctx);
2697     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2698     dri_bo *bsd_mpc_bo, *mpr_bo;
2699
2700     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2701                               "bsd mpc row store",
2702                               11520, /* 1.5 * 120 * 64 */
2703                               0x1000);
2704
2705     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2706                           "mpr row store",
2707                           7680, /* 1. 0 * 120 * 64 */
2708                           0x1000);
2709
2710     BEGIN_BCS_BATCH(batch, 10);
2711     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2712
2713     OUT_BCS_RELOC(batch,
2714                   bsd_mpc_bo,
2715                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2716                   0);
2717
2718     OUT_BCS_BATCH(batch, 0);
2719     OUT_BCS_BATCH(batch, 0);
2720
2721     OUT_BCS_RELOC(batch,
2722                   mpr_bo,
2723                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2724                   0);
2725     OUT_BCS_BATCH(batch, 0);
2726     OUT_BCS_BATCH(batch, 0);
2727
2728     OUT_BCS_BATCH(batch, 0);
2729     OUT_BCS_BATCH(batch, 0);
2730     OUT_BCS_BATCH(batch, 0);
2731
2732     ADVANCE_BCS_BATCH(batch);
2733
2734     dri_bo_unreference(bsd_mpc_bo);
2735     dri_bo_unreference(mpr_bo);
2736 }
2737
2738 static void
2739 gen75_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2740                                      struct gen7_mfd_context *gen7_mfd_context)
2741 {
2742     struct i965_driver_data *i965 = i965_driver_data(ctx);
2743     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2744     dri_bo *bsd_mpc_bo, *mpr_bo;
2745
2746     if (IS_STEPPING_BPLUS(i965)) {
2747         gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(ctx, gen7_mfd_context);
2748         return;
2749     }
2750
2751     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2752                               "bsd mpc row store",
2753                               11520, /* 1.5 * 120 * 64 */
2754                               0x1000);
2755
2756     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2757                           "mpr row store",
2758                           7680, /* 1. 0 * 120 * 64 */
2759                           0x1000);
2760
2761     BEGIN_BCS_BATCH(batch, 4);
2762     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2763
2764     OUT_BCS_RELOC(batch,
2765                   bsd_mpc_bo,
2766                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2767                   0);
2768
2769     OUT_BCS_RELOC(batch,
2770                   mpr_bo,
2771                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2772                   0);
2773     OUT_BCS_BATCH(batch, 0);
2774
2775     ADVANCE_BCS_BATCH(batch);
2776
2777     dri_bo_unreference(bsd_mpc_bo);
2778     dri_bo_unreference(mpr_bo);
2779 }
2780
2781 static void
2782 gen75_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2783                           struct gen7_mfd_context *gen7_mfd_context)
2784 {
2785
2786 }
2787
2788 static void
2789 gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
2790                            struct gen7_mfd_context *gen7_mfd_context)
2791 {
2792     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2793     int img_struct = 0;
2794     int mbaff_frame_flag = 0;
2795     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2796
2797     BEGIN_BCS_BATCH(batch, 16);
2798     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2799     OUT_BCS_BATCH(batch, 
2800                   (width_in_mbs * height_in_mbs - 1));
2801     OUT_BCS_BATCH(batch, 
2802                   ((height_in_mbs - 1) << 16) | 
2803                   ((width_in_mbs - 1) << 0));
2804     OUT_BCS_BATCH(batch, 
2805                   (0 << 24) |
2806                   (0 << 16) |
2807                   (0 << 14) |
2808                   (0 << 13) |
2809                   (0 << 12) | /* differ from GEN6 */
2810                   (0 << 10) |
2811                   (img_struct << 8));
2812     OUT_BCS_BATCH(batch,
2813                   (1 << 10) | /* 4:2:0 */
2814                   (1 << 7) |  /* CABAC */
2815                   (0 << 6) |
2816                   (0 << 5) |
2817                   (0 << 4) |
2818                   (0 << 3) |
2819                   (1 << 2) |
2820                   (mbaff_frame_flag << 1) |
2821                   (0 << 0));
2822     OUT_BCS_BATCH(batch, 0);
2823     OUT_BCS_BATCH(batch, 0);
2824     OUT_BCS_BATCH(batch, 0);
2825     OUT_BCS_BATCH(batch, 0);
2826     OUT_BCS_BATCH(batch, 0);
2827     OUT_BCS_BATCH(batch, 0);
2828     OUT_BCS_BATCH(batch, 0);
2829     OUT_BCS_BATCH(batch, 0);
2830     OUT_BCS_BATCH(batch, 0);
2831     OUT_BCS_BATCH(batch, 0);
2832     OUT_BCS_BATCH(batch, 0);
2833     ADVANCE_BCS_BATCH(batch);
2834 }
2835
2836 static void
2837 gen75_jpeg_wa_avc_directmode_state_bplus(VADriverContextP ctx,
2838                                   struct gen7_mfd_context *gen7_mfd_context)
2839 {
2840     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2841     int i;
2842
2843     BEGIN_BCS_BATCH(batch, 71);
2844     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2845
2846     /* reference surfaces 0..15 */
2847     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2848         OUT_BCS_BATCH(batch, 0); /* top */
2849         OUT_BCS_BATCH(batch, 0); /* bottom */
2850     }
2851         
2852         OUT_BCS_BATCH(batch, 0);
2853
2854     /* the current decoding frame/field */
2855     OUT_BCS_BATCH(batch, 0); /* top */
2856     OUT_BCS_BATCH(batch, 0);
2857     OUT_BCS_BATCH(batch, 0);
2858
2859     /* POC List */
2860     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2861         OUT_BCS_BATCH(batch, 0);
2862         OUT_BCS_BATCH(batch, 0);
2863     }
2864
2865     OUT_BCS_BATCH(batch, 0);
2866     OUT_BCS_BATCH(batch, 0);
2867
2868     ADVANCE_BCS_BATCH(batch);
2869 }
2870
2871 static void
2872 gen75_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2873                                   struct gen7_mfd_context *gen7_mfd_context)
2874 {
2875     struct i965_driver_data *i965 = i965_driver_data(ctx);
2876     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2877     int i;
2878
2879     if (IS_STEPPING_BPLUS(i965)) {
2880         gen75_jpeg_wa_avc_directmode_state_bplus(ctx, gen7_mfd_context);
2881         return;
2882     }   
2883
2884     BEGIN_BCS_BATCH(batch, 69);
2885     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
2886
2887     /* reference surfaces 0..15 */
2888     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2889         OUT_BCS_BATCH(batch, 0); /* top */
2890         OUT_BCS_BATCH(batch, 0); /* bottom */
2891     }
2892
2893     /* the current decoding frame/field */
2894     OUT_BCS_BATCH(batch, 0); /* top */
2895     OUT_BCS_BATCH(batch, 0); /* bottom */
2896
2897     /* POC List */
2898     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2899         OUT_BCS_BATCH(batch, 0);
2900         OUT_BCS_BATCH(batch, 0);
2901     }
2902
2903     OUT_BCS_BATCH(batch, 0);
2904     OUT_BCS_BATCH(batch, 0);
2905
2906     ADVANCE_BCS_BATCH(batch);
2907 }
2908
2909 static void 
2910 gen75_jpeg_wa_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
2911                                      struct gen7_mfd_context *gen7_mfd_context)
2912 {
2913     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2914
2915     BEGIN_BCS_BATCH(batch, 11);
2916     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2917     OUT_BCS_RELOC(batch,
2918                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2919                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2920                   0);
2921     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2922     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2923     OUT_BCS_BATCH(batch, 0);
2924     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2925     OUT_BCS_BATCH(batch, 0);
2926     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2927     OUT_BCS_BATCH(batch, 0);
2928     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2929     OUT_BCS_BATCH(batch, 0);
2930     ADVANCE_BCS_BATCH(batch);
2931 }
2932
2933 static void
2934 gen75_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2935                                      struct gen7_mfd_context *gen7_mfd_context)
2936 {
2937     struct i965_driver_data *i965 = i965_driver_data(ctx);
2938     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2939
2940     if (IS_STEPPING_BPLUS(i965)) {
2941         gen75_jpeg_wa_ind_obj_base_addr_state_bplus(ctx, gen7_mfd_context);
2942         return;
2943     }   
2944
2945     BEGIN_BCS_BATCH(batch, 11);
2946     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2947     OUT_BCS_RELOC(batch,
2948                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2949                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2950                   0);
2951     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2952     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2953     OUT_BCS_BATCH(batch, 0);
2954     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2955     OUT_BCS_BATCH(batch, 0);
2956     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2957     OUT_BCS_BATCH(batch, 0);
2958     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2959     OUT_BCS_BATCH(batch, 0);
2960     ADVANCE_BCS_BATCH(batch);
2961 }
2962
2963 static void
2964 gen75_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2965                             struct gen7_mfd_context *gen7_mfd_context)
2966 {
2967     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2968
2969     /* the input bitsteam format on GEN7 differs from GEN6 */
2970     BEGIN_BCS_BATCH(batch, 6);
2971     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2972     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2973     OUT_BCS_BATCH(batch, 0);
2974     OUT_BCS_BATCH(batch,
2975                   (0 << 31) |
2976                   (0 << 14) |
2977                   (0 << 12) |
2978                   (0 << 10) |
2979                   (0 << 8));
2980     OUT_BCS_BATCH(batch,
2981                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2982                   (0 << 5)  |
2983                   (0 << 4)  |
2984                   (1 << 3) | /* LastSlice Flag */
2985                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2986     OUT_BCS_BATCH(batch, 0);
2987     ADVANCE_BCS_BATCH(batch);
2988 }
2989
2990 static void
2991 gen75_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2992                              struct gen7_mfd_context *gen7_mfd_context)
2993 {
2994     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2995     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2996     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2997     int first_mb_in_slice = 0;
2998     int slice_type = SLICE_TYPE_I;
2999
3000     BEGIN_BCS_BATCH(batch, 11);
3001     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3002     OUT_BCS_BATCH(batch, slice_type);
3003     OUT_BCS_BATCH(batch, 
3004                   (num_ref_idx_l1 << 24) |
3005                   (num_ref_idx_l0 << 16) |
3006                   (0 << 8) |
3007                   (0 << 0));
3008     OUT_BCS_BATCH(batch, 
3009                   (0 << 29) |
3010                   (1 << 27) |   /* disable Deblocking */
3011                   (0 << 24) |
3012                   (gen7_jpeg_wa_clip.qp << 16) |
3013                   (0 << 8) |
3014                   (0 << 0));
3015     OUT_BCS_BATCH(batch, 
3016                   (slice_ver_pos << 24) |
3017                   (slice_hor_pos << 16) | 
3018                   (first_mb_in_slice << 0));
3019     OUT_BCS_BATCH(batch,
3020                   (next_slice_ver_pos << 16) |
3021                   (next_slice_hor_pos << 0));
3022     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
3023     OUT_BCS_BATCH(batch, 0);
3024     OUT_BCS_BATCH(batch, 0);
3025     OUT_BCS_BATCH(batch, 0);
3026     OUT_BCS_BATCH(batch, 0);
3027     ADVANCE_BCS_BATCH(batch);
3028 }
3029
3030 static void
3031 gen75_mfd_jpeg_wa(VADriverContextP ctx,
3032                  struct gen7_mfd_context *gen7_mfd_context)
3033 {
3034     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3035     gen75_jpeg_wa_init(ctx, gen7_mfd_context);
3036     intel_batchbuffer_emit_mi_flush(batch);
3037     gen75_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
3038     gen75_jpeg_wa_surface_state(ctx, gen7_mfd_context);
3039     gen75_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
3040     gen75_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
3041     gen75_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
3042     gen75_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
3043     gen75_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
3044
3045     gen75_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
3046     gen75_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
3047     gen75_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
3048 }
3049
3050 void
3051 gen75_mfd_jpeg_decode_picture(VADriverContextP ctx,
3052                              struct decode_state *decode_state,
3053                              struct gen7_mfd_context *gen7_mfd_context)
3054 {
3055     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3056     VAPictureParameterBufferJPEGBaseline *pic_param;
3057     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
3058     dri_bo *slice_data_bo;
3059     int i, j, max_selector = 0;
3060
3061     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3062     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
3063
3064     /* Currently only support Baseline DCT */
3065     gen75_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
3066     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3067     gen75_mfd_jpeg_wa(ctx, gen7_mfd_context);
3068     intel_batchbuffer_emit_mi_flush(batch);
3069     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3070     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3071     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3072     gen75_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
3073     gen75_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
3074
3075     for (j = 0; j < decode_state->num_slice_params; j++) {
3076         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3077         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3078         slice_data_bo = decode_state->slice_datas[j]->bo;
3079         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3080
3081         if (j == decode_state->num_slice_params - 1)
3082             next_slice_group_param = NULL;
3083         else
3084             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3085
3086         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3087             int component;
3088
3089             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3090
3091             if (i < decode_state->slice_params[j]->num_elements - 1)
3092                 next_slice_param = slice_param + 1;
3093             else
3094                 next_slice_param = next_slice_group_param;
3095
3096             for (component = 0; component < slice_param->num_components; component++) {
3097                 if (max_selector < slice_param->components[component].dc_table_selector)
3098                     max_selector = slice_param->components[component].dc_table_selector;
3099
3100                 if (max_selector < slice_param->components[component].ac_table_selector)
3101                     max_selector = slice_param->components[component].ac_table_selector;
3102             }
3103
3104             slice_param++;
3105         }
3106     }
3107
3108     assert(max_selector < 2);
3109     gen75_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
3110
3111     for (j = 0; j < decode_state->num_slice_params; j++) {
3112         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3113         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3114         slice_data_bo = decode_state->slice_datas[j]->bo;
3115         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3116
3117         if (j == decode_state->num_slice_params - 1)
3118             next_slice_group_param = NULL;
3119         else
3120             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3121
3122         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3123             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3124
3125             if (i < decode_state->slice_params[j]->num_elements - 1)
3126                 next_slice_param = slice_param + 1;
3127             else
3128                 next_slice_param = next_slice_group_param;
3129
3130             gen75_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
3131             slice_param++;
3132         }
3133     }
3134
3135     intel_batchbuffer_end_atomic(batch);
3136     intel_batchbuffer_flush(batch);
3137 }
3138
3139 static VAStatus
3140 gen75_mfd_decode_picture(VADriverContextP ctx, 
3141                         VAProfile profile, 
3142                         union codec_state *codec_state,
3143                         struct hw_context *hw_context)
3144
3145 {
3146     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3147     struct decode_state *decode_state = &codec_state->decode;
3148     VAStatus vaStatus;
3149
3150     assert(gen7_mfd_context);
3151
3152     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3153
3154     if (vaStatus != VA_STATUS_SUCCESS)
3155         goto out;
3156
3157     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3158
3159     switch (profile) {
3160     case VAProfileMPEG2Simple:
3161     case VAProfileMPEG2Main:
3162         gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3163         break;
3164         
3165     case VAProfileH264ConstrainedBaseline:
3166     case VAProfileH264Main:
3167     case VAProfileH264High:
3168     case VAProfileH264StereoHigh:
3169     case VAProfileH264MultiviewHigh:
3170         gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3171         break;
3172
3173     case VAProfileVC1Simple:
3174     case VAProfileVC1Main:
3175     case VAProfileVC1Advanced:
3176         gen75_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3177         break;
3178
3179     case VAProfileJPEGBaseline:
3180         gen75_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3181         break;
3182
3183     default:
3184         assert(0);
3185         break;
3186     }
3187
3188     vaStatus = VA_STATUS_SUCCESS;
3189
3190 out:
3191     return vaStatus;
3192 }
3193
3194 static void
3195 gen75_mfd_context_destroy(void *hw_context)
3196 {
3197     VADriverContextP ctx;
3198     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3199
3200     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3201
3202     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3203     gen7_mfd_context->post_deblocking_output.bo = NULL;
3204
3205     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3206     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3207
3208     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3209     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3210
3211     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3212     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3213
3214     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3215     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3216
3217     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3218     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3219
3220     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3221     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3222
3223     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3224
3225     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3226         i965_DestroySurfaces(ctx,
3227                              &gen7_mfd_context->jpeg_wa_surface_id,
3228                              1);
3229         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3230     }
3231
3232     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3233     free(gen7_mfd_context);
3234 }
3235
3236 static void gen75_mfd_mpeg2_context_init(VADriverContextP ctx,
3237                                     struct gen7_mfd_context *gen7_mfd_context)
3238 {
3239     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3240     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3241     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3242     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3243 }
3244
3245 struct hw_context *
3246 gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3247 {
3248     struct intel_driver_data *intel = intel_driver_data(ctx);
3249     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3250     int i;
3251
3252     assert(gen7_mfd_context);
3253     gen7_mfd_context->base.destroy = gen75_mfd_context_destroy;
3254     gen7_mfd_context->base.run = gen75_mfd_decode_picture;
3255     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3256
3257     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3258         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3259         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3260         gen7_mfd_context->reference_surface[i].obj_surface = NULL;
3261     }
3262
3263     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3264     gen7_mfd_context->jpeg_wa_surface_object = NULL;
3265
3266     switch (obj_config->profile) {
3267     case VAProfileMPEG2Simple:
3268     case VAProfileMPEG2Main:
3269         gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3270         break;
3271
3272     case VAProfileH264ConstrainedBaseline:
3273     case VAProfileH264Main:
3274     case VAProfileH264High:
3275     case VAProfileH264StereoHigh:
3276     case VAProfileH264MultiviewHigh:
3277         gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
3278         break;
3279     default:
3280         break;
3281     }
3282
3283     gen7_mfd_context->driver_context = ctx;
3284     return (struct hw_context *)gen7_mfd_context;
3285 }