OSDN Git Service

decoder: h264: factor out look ups for VA/H264 picture info.
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77         assert((obj_surface->size & 0x3f) == 0);
78         obj_surface->private_data = gen7_avc_surface;
79     }
80
81     /* DMV buffers now relate to the whole frame, irrespective of
82        field coding modes */
83     if (gen7_avc_surface->dmv_top == NULL) {
84         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
85                                                  "direct mv w/r buffer",
86                                                  width_in_mbs * height_in_mbs * 128,
87                                                  0x1000);
88         assert(gen7_avc_surface->dmv_top);
89     }
90 }
91
92 static void
93 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
94                           struct decode_state *decode_state,
95                           int standard_select,
96                           struct gen7_mfd_context *gen7_mfd_context)
97 {
98     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
99
100     assert(standard_select == MFX_FORMAT_MPEG2 ||
101            standard_select == MFX_FORMAT_AVC ||
102            standard_select == MFX_FORMAT_VC1 ||
103            standard_select == MFX_FORMAT_JPEG ||
104            standard_select == MFX_FORMAT_VP8);
105
106     BEGIN_BCS_BATCH(batch, 5);
107     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
108     OUT_BCS_BATCH(batch,
109                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
110                   (MFD_MODE_VLD << 15) | /* VLD mode */
111                   (0 << 10) | /* disable Stream-Out */
112                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
113                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
114                   (0 << 5)  | /* not in stitch mode */
115                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
116                   (standard_select << 0));
117     OUT_BCS_BATCH(batch,
118                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
119                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
120                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
121                   (0 << 1)  |
122                   (0 << 0));
123     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
124     OUT_BCS_BATCH(batch, 0); /* reserved */
125     ADVANCE_BCS_BATCH(batch);
126 }
127
128 static void
129 gen8_mfd_surface_state(VADriverContextP ctx,
130                        struct decode_state *decode_state,
131                        int standard_select,
132                        struct gen7_mfd_context *gen7_mfd_context)
133 {
134     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
135     struct object_surface *obj_surface = decode_state->render_object;
136     unsigned int y_cb_offset;
137     unsigned int y_cr_offset;
138     unsigned int surface_format;
139
140     assert(obj_surface);
141
142     y_cb_offset = obj_surface->y_cb_offset;
143     y_cr_offset = obj_surface->y_cr_offset;
144
145     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
146         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
147
148     BEGIN_BCS_BATCH(batch, 6);
149     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
150     OUT_BCS_BATCH(batch, 0);
151     OUT_BCS_BATCH(batch,
152                   ((obj_surface->orig_height - 1) << 18) |
153                   ((obj_surface->orig_width - 1) << 4));
154     OUT_BCS_BATCH(batch,
155                   (surface_format << 28) | /* 420 planar YUV surface */
156                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
157                   (0 << 22) | /* surface object control state, ignored */
158                   ((obj_surface->width - 1) << 3) | /* pitch */
159                   (0 << 2)  | /* must be 0 */
160                   (1 << 1)  | /* must be tiled */
161                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
162     OUT_BCS_BATCH(batch,
163                   (0 << 16) | /* X offset for U(Cb), must be 0 */
164                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
165     OUT_BCS_BATCH(batch,
166                   (0 << 16) | /* X offset for V(Cr), must be 0 */
167                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
168     ADVANCE_BCS_BATCH(batch);
169 }
170
171 static void
172 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
173                              struct decode_state *decode_state,
174                              int standard_select,
175                              struct gen7_mfd_context *gen7_mfd_context)
176 {
177     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
178     int i;
179
180     BEGIN_BCS_BATCH(batch, 61);
181     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
182         /* Pre-deblock 1-3 */
183     if (gen7_mfd_context->pre_deblocking_output.valid)
184         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
185                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
186                       0);
187     else
188         OUT_BCS_BATCH(batch, 0);
189
190         OUT_BCS_BATCH(batch, 0);
191         OUT_BCS_BATCH(batch, 0);
192         /* Post-debloing 4-6 */
193     if (gen7_mfd_context->post_deblocking_output.valid)
194         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
195                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
196                       0);
197     else
198         OUT_BCS_BATCH(batch, 0);
199
200         OUT_BCS_BATCH(batch, 0);
201         OUT_BCS_BATCH(batch, 0);
202
203         /* uncompressed-video & stream out 7-12 */
204     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
205     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206         OUT_BCS_BATCH(batch, 0);
207         OUT_BCS_BATCH(batch, 0);
208         OUT_BCS_BATCH(batch, 0);
209         OUT_BCS_BATCH(batch, 0);
210
211         /* intra row-store scratch 13-15 */
212     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
213         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
214                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
215                       0);
216     else
217         OUT_BCS_BATCH(batch, 0);
218
219         OUT_BCS_BATCH(batch, 0);
220         OUT_BCS_BATCH(batch, 0);
221         /* deblocking-filter-row-store 16-18 */
222     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
223         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
224                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
225                       0);
226     else
227         OUT_BCS_BATCH(batch, 0);
228         OUT_BCS_BATCH(batch, 0);
229         OUT_BCS_BATCH(batch, 0);
230
231     /* DW 19..50 */
232     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
233         struct object_surface *obj_surface;
234
235         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
236             gen7_mfd_context->reference_surface[i].obj_surface &&
237             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
238             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
239
240             OUT_BCS_RELOC(batch, obj_surface->bo,
241                           I915_GEM_DOMAIN_INSTRUCTION, 0,
242                           0);
243         } else {
244             OUT_BCS_BATCH(batch, 0);
245         }
246         
247         OUT_BCS_BATCH(batch, 0);
248     }
249     
250     /* reference property 51 */
251     OUT_BCS_BATCH(batch, 0);  
252         
253     /* Macroblock status & ILDB 52-57 */
254     OUT_BCS_BATCH(batch, 0);
255     OUT_BCS_BATCH(batch, 0);
256     OUT_BCS_BATCH(batch, 0);
257     OUT_BCS_BATCH(batch, 0);
258     OUT_BCS_BATCH(batch, 0);
259     OUT_BCS_BATCH(batch, 0);
260
261     /* the second Macroblock status 58-60 */    
262     OUT_BCS_BATCH(batch, 0);
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265
266     ADVANCE_BCS_BATCH(batch);
267 }
268
269 static void
270 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
271                                  dri_bo *slice_data_bo,
272                                  int standard_select,
273                                  struct gen7_mfd_context *gen7_mfd_context)
274 {
275     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
276
277     BEGIN_BCS_BATCH(batch, 26);
278     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
279         /* MFX In BS 1-5 */
280     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
281     OUT_BCS_BATCH(batch, 0);
282     OUT_BCS_BATCH(batch, 0);
283         /* Upper bound 4-5 */   
284     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
285     OUT_BCS_BATCH(batch, 0);
286
287         /* MFX indirect MV 6-10 */
288     OUT_BCS_BATCH(batch, 0);
289     OUT_BCS_BATCH(batch, 0);
290     OUT_BCS_BATCH(batch, 0);
291     OUT_BCS_BATCH(batch, 0);
292     OUT_BCS_BATCH(batch, 0);
293         
294         /* MFX IT_COFF 11-15 */
295     OUT_BCS_BATCH(batch, 0);
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299     OUT_BCS_BATCH(batch, 0);
300
301         /* MFX IT_DBLK 16-20 */
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0);
307
308         /* MFX PAK_BSE object for encoder 21-25 */
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313     OUT_BCS_BATCH(batch, 0);
314
315     ADVANCE_BCS_BATCH(batch);
316 }
317
318 static void
319 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
320                                  struct decode_state *decode_state,
321                                  int standard_select,
322                                  struct gen7_mfd_context *gen7_mfd_context)
323 {
324     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
325
326     BEGIN_BCS_BATCH(batch, 10);
327     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
328
329     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
330         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
331                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
332                       0);
333         else
334                 OUT_BCS_BATCH(batch, 0);
335                 
336     OUT_BCS_BATCH(batch, 0);
337     OUT_BCS_BATCH(batch, 0);
338         /* MPR Row Store Scratch buffer 4-6 */
339     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
340         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
341                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
342                       0);
343     else
344         OUT_BCS_BATCH(batch, 0);
345
346     OUT_BCS_BATCH(batch, 0);
347     OUT_BCS_BATCH(batch, 0);
348
349         /* Bitplane 7-9 */ 
350     if (gen7_mfd_context->bitplane_read_buffer.valid)
351         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
352                       I915_GEM_DOMAIN_INSTRUCTION, 0,
353                       0);
354     else
355         OUT_BCS_BATCH(batch, 0);
356     OUT_BCS_BATCH(batch, 0);
357     OUT_BCS_BATCH(batch, 0);
358     ADVANCE_BCS_BATCH(batch);
359 }
360
361 static void
362 gen8_mfd_qm_state(VADriverContextP ctx,
363                   int qm_type,
364                   unsigned char *qm,
365                   int qm_length,
366                   struct gen7_mfd_context *gen7_mfd_context)
367 {
368     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
369     unsigned int qm_buffer[16];
370
371     assert(qm_length <= 16 * 4);
372     memcpy(qm_buffer, qm, qm_length);
373
374     BEGIN_BCS_BATCH(batch, 18);
375     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
376     OUT_BCS_BATCH(batch, qm_type << 0);
377     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
378     ADVANCE_BCS_BATCH(batch);
379 }
380
381 static void
382 gen8_mfd_avc_img_state(VADriverContextP ctx,
383                        struct decode_state *decode_state,
384                        struct gen7_mfd_context *gen7_mfd_context)
385 {
386     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
387     int img_struct;
388     int mbaff_frame_flag;
389     unsigned int width_in_mbs, height_in_mbs;
390     VAPictureParameterBufferH264 *pic_param;
391
392     assert(decode_state->pic_param && decode_state->pic_param->buffer);
393     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
394     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
395
396     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
397         img_struct = 1;
398     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
399         img_struct = 3;
400     else
401         img_struct = 0;
402
403     if ((img_struct & 0x1) == 0x1) {
404         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
405     } else {
406         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
407     }
408
409     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
410         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
411         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
412     } else {
413         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
414     }
415
416     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
417                         !pic_param->pic_fields.bits.field_pic_flag);
418
419     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
420     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
421
422     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
423     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
424            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
425     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
426
427     BEGIN_BCS_BATCH(batch, 17);
428     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
429     OUT_BCS_BATCH(batch, 
430                   (width_in_mbs * height_in_mbs - 1));
431     OUT_BCS_BATCH(batch, 
432                   ((height_in_mbs - 1) << 16) | 
433                   ((width_in_mbs - 1) << 0));
434     OUT_BCS_BATCH(batch, 
435                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
436                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
437                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
438                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
439                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
440                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
441                   (img_struct << 8));
442     OUT_BCS_BATCH(batch,
443                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
444                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
445                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
446                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
447                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
448                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
449                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
450                   (mbaff_frame_flag << 1) |
451                   (pic_param->pic_fields.bits.field_pic_flag << 0));
452     OUT_BCS_BATCH(batch, 0);
453     OUT_BCS_BATCH(batch, 0);
454     OUT_BCS_BATCH(batch, 0);
455     OUT_BCS_BATCH(batch, 0);
456     OUT_BCS_BATCH(batch, 0);
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     ADVANCE_BCS_BATCH(batch);
465 }
466
467 static void
468 gen8_mfd_avc_qm_state(VADriverContextP ctx,
469                       struct decode_state *decode_state,
470                       struct gen7_mfd_context *gen7_mfd_context)
471 {
472     VAIQMatrixBufferH264 *iq_matrix;
473     VAPictureParameterBufferH264 *pic_param;
474
475     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
476         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
477     else
478         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
479
480     assert(decode_state->pic_param && decode_state->pic_param->buffer);
481     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
482
483     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
484     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
485
486     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
487         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
488         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
489     }
490 }
491
492 static void
493 gen8_mfd_avc_picid_state(VADriverContextP ctx,
494                       struct decode_state *decode_state,
495                       struct gen7_mfd_context *gen7_mfd_context)
496 {
497     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
498
499     BEGIN_BCS_BATCH(batch, 10);
500     OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
501     OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
502     OUT_BCS_BATCH(batch, 0);
503     OUT_BCS_BATCH(batch, 0);
504     OUT_BCS_BATCH(batch, 0);
505     OUT_BCS_BATCH(batch, 0);
506     OUT_BCS_BATCH(batch, 0);
507     OUT_BCS_BATCH(batch, 0);
508     OUT_BCS_BATCH(batch, 0);
509     OUT_BCS_BATCH(batch, 0);
510     ADVANCE_BCS_BATCH(batch);
511 }
512
513 static void
514 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
515                               struct decode_state *decode_state,
516                               VAPictureParameterBufferH264 *pic_param,
517                               VASliceParameterBufferH264 *slice_param,
518                               struct gen7_mfd_context *gen7_mfd_context)
519 {
520     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
521     struct object_surface *obj_surface;
522     GenAvcSurface *gen7_avc_surface;
523     VAPictureH264 *va_pic;
524     int i;
525
526     BEGIN_BCS_BATCH(batch, 71);
527     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
528
529     /* reference surfaces 0..15 */
530     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
531         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
532             gen7_mfd_context->reference_surface[i].obj_surface &&
533             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
534
535             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
536             gen7_avc_surface = obj_surface->private_data;
537
538             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
539                           I915_GEM_DOMAIN_INSTRUCTION, 0,
540                           0);
541             OUT_BCS_BATCH(batch, 0);
542         } else {
543             OUT_BCS_BATCH(batch, 0);
544             OUT_BCS_BATCH(batch, 0);
545         }
546     }
547     
548     OUT_BCS_BATCH(batch, 0);
549
550     /* the current decoding frame/field */
551     va_pic = &pic_param->CurrPic;
552     obj_surface = decode_state->render_object;
553     assert(obj_surface->bo && obj_surface->private_data);
554     gen7_avc_surface = obj_surface->private_data;
555
556     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
557                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
558                   0);
559
560     OUT_BCS_BATCH(batch, 0);
561     OUT_BCS_BATCH(batch, 0);
562
563     /* POC List */
564     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
565         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
566
567         if (obj_surface) {
568             const VAPictureH264 * const va_pic = avc_find_picture(
569                 obj_surface->base.id, pic_param->ReferenceFrames,
570                 ARRAY_ELEMS(pic_param->ReferenceFrames));
571
572             assert(va_pic != NULL);
573             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
574             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
575         } else {
576             OUT_BCS_BATCH(batch, 0);
577             OUT_BCS_BATCH(batch, 0);
578         }
579     }
580
581     va_pic = &pic_param->CurrPic;
582     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
583     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
584
585     ADVANCE_BCS_BATCH(batch);
586 }
587
588 static void
589 gen8_mfd_avc_slice_state(VADriverContextP ctx,
590                          VAPictureParameterBufferH264 *pic_param,
591                          VASliceParameterBufferH264 *slice_param,
592                          VASliceParameterBufferH264 *next_slice_param,
593                          struct gen7_mfd_context *gen7_mfd_context)
594 {
595     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
596     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
597     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
598     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
599     int num_ref_idx_l0, num_ref_idx_l1;
600     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
601                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
602     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
603     int slice_type;
604
605     if (slice_param->slice_type == SLICE_TYPE_I ||
606         slice_param->slice_type == SLICE_TYPE_SI) {
607         slice_type = SLICE_TYPE_I;
608     } else if (slice_param->slice_type == SLICE_TYPE_P ||
609                slice_param->slice_type == SLICE_TYPE_SP) {
610         slice_type = SLICE_TYPE_P;
611     } else { 
612         assert(slice_param->slice_type == SLICE_TYPE_B);
613         slice_type = SLICE_TYPE_B;
614     }
615
616     if (slice_type == SLICE_TYPE_I) {
617         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
618         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
619         num_ref_idx_l0 = 0;
620         num_ref_idx_l1 = 0;
621     } else if (slice_type == SLICE_TYPE_P) {
622         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
623         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
624         num_ref_idx_l1 = 0;
625     } else {
626         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
627         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
628     }
629
630     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
631     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
632     slice_ver_pos = first_mb_in_slice / width_in_mbs;
633
634     if (next_slice_param) {
635         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
636         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
637         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
638     } else {
639         next_slice_hor_pos = 0;
640         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
641     }
642
643     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
644     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
645     OUT_BCS_BATCH(batch, slice_type);
646     OUT_BCS_BATCH(batch, 
647                   (num_ref_idx_l1 << 24) |
648                   (num_ref_idx_l0 << 16) |
649                   (slice_param->chroma_log2_weight_denom << 8) |
650                   (slice_param->luma_log2_weight_denom << 0));
651     OUT_BCS_BATCH(batch, 
652                   (slice_param->direct_spatial_mv_pred_flag << 29) |
653                   (slice_param->disable_deblocking_filter_idc << 27) |
654                   (slice_param->cabac_init_idc << 24) |
655                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
656                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
657                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
658     OUT_BCS_BATCH(batch, 
659                   (slice_ver_pos << 24) |
660                   (slice_hor_pos << 16) | 
661                   (first_mb_in_slice << 0));
662     OUT_BCS_BATCH(batch,
663                   (next_slice_ver_pos << 16) |
664                   (next_slice_hor_pos << 0));
665     OUT_BCS_BATCH(batch, 
666                   (next_slice_param == NULL) << 19); /* last slice flag */
667     OUT_BCS_BATCH(batch, 0);
668     OUT_BCS_BATCH(batch, 0);
669     OUT_BCS_BATCH(batch, 0);
670     OUT_BCS_BATCH(batch, 0);
671     ADVANCE_BCS_BATCH(batch);
672 }
673
674 static inline void
675 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
676                            VAPictureParameterBufferH264 *pic_param,
677                            VASliceParameterBufferH264 *slice_param,
678                            struct gen7_mfd_context *gen7_mfd_context)
679 {
680     gen6_send_avc_ref_idx_state(
681         gen7_mfd_context->base.batch,
682         slice_param,
683         gen7_mfd_context->reference_surface
684     );
685 }
686
687 static void
688 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
689                                 VAPictureParameterBufferH264 *pic_param,
690                                 VASliceParameterBufferH264 *slice_param,
691                                 struct gen7_mfd_context *gen7_mfd_context)
692 {
693     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
694     int i, j, num_weight_offset_table = 0;
695     short weightoffsets[32 * 6];
696
697     if ((slice_param->slice_type == SLICE_TYPE_P ||
698          slice_param->slice_type == SLICE_TYPE_SP) &&
699         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
700         num_weight_offset_table = 1;
701     }
702     
703     if ((slice_param->slice_type == SLICE_TYPE_B) &&
704         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
705         num_weight_offset_table = 2;
706     }
707
708     for (i = 0; i < num_weight_offset_table; i++) {
709         BEGIN_BCS_BATCH(batch, 98);
710         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
711         OUT_BCS_BATCH(batch, i);
712
713         if (i == 0) {
714             for (j = 0; j < 32; j++) {
715                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
716                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
717                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
718                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
719                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
720                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
721             }
722         } else {
723             for (j = 0; j < 32; j++) {
724                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
725                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
726                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
727                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
728                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
729                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
730             }
731         }
732
733         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
734         ADVANCE_BCS_BATCH(batch);
735     }
736 }
737
738 static void
739 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
740                         VAPictureParameterBufferH264 *pic_param,
741                         VASliceParameterBufferH264 *slice_param,
742                         dri_bo *slice_data_bo,
743                         VASliceParameterBufferH264 *next_slice_param,
744                         struct gen7_mfd_context *gen7_mfd_context)
745 {
746     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
747     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
748                                                             slice_param,
749                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
750
751     /* the input bitsteam format on GEN7 differs from GEN6 */
752     BEGIN_BCS_BATCH(batch, 6);
753     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
754     OUT_BCS_BATCH(batch, 
755                   (slice_param->slice_data_size));
756     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
757     OUT_BCS_BATCH(batch,
758                   (0 << 31) |
759                   (0 << 14) |
760                   (0 << 12) |
761                   (0 << 10) |
762                   (0 << 8));
763     OUT_BCS_BATCH(batch,
764                   ((slice_data_bit_offset >> 3) << 16) |
765                   (1 << 7)  |
766                   (0 << 5)  |
767                   (0 << 4)  |
768                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
769                   (slice_data_bit_offset & 0x7));
770     OUT_BCS_BATCH(batch, 0);
771     ADVANCE_BCS_BATCH(batch);
772 }
773
774 static inline void
775 gen8_mfd_avc_context_init(
776     VADriverContextP         ctx,
777     struct gen7_mfd_context *gen7_mfd_context
778 )
779 {
780     /* Initialize flat scaling lists */
781     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
782 }
783
784 static void
785 gen8_mfd_avc_decode_init(VADriverContextP ctx,
786                          struct decode_state *decode_state,
787                          struct gen7_mfd_context *gen7_mfd_context)
788 {
789     VAPictureParameterBufferH264 *pic_param;
790     VASliceParameterBufferH264 *slice_param;
791     struct i965_driver_data *i965 = i965_driver_data(ctx);
792     struct object_surface *obj_surface;
793     dri_bo *bo;
794     int i, j, enable_avc_ildb = 0;
795     unsigned int width_in_mbs, height_in_mbs;
796
797     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
798         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
799         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
800
801         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
802             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
803             assert((slice_param->slice_type == SLICE_TYPE_I) ||
804                    (slice_param->slice_type == SLICE_TYPE_SI) ||
805                    (slice_param->slice_type == SLICE_TYPE_P) ||
806                    (slice_param->slice_type == SLICE_TYPE_SP) ||
807                    (slice_param->slice_type == SLICE_TYPE_B));
808
809             if (slice_param->disable_deblocking_filter_idc != 1) {
810                 enable_avc_ildb = 1;
811                 break;
812             }
813
814             slice_param++;
815         }
816     }
817
818     assert(decode_state->pic_param && decode_state->pic_param->buffer);
819     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
820     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
821     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
822     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
823     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
824     assert(height_in_mbs > 0 && height_in_mbs <= 256);
825
826     /* Current decoded picture */
827     obj_surface = decode_state->render_object;
828     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
829     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
830
831     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
832     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
833
834     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
835     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
836     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
837     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
838
839     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
840     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
841     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
842     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
843
844     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
845     bo = dri_bo_alloc(i965->intel.bufmgr,
846                       "intra row store",
847                       width_in_mbs * 64,
848                       0x1000);
849     assert(bo);
850     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
851     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
852
853     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
854     bo = dri_bo_alloc(i965->intel.bufmgr,
855                       "deblocking filter row store",
856                       width_in_mbs * 64 * 4,
857                       0x1000);
858     assert(bo);
859     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
860     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
861
862     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
863     bo = dri_bo_alloc(i965->intel.bufmgr,
864                       "bsd mpc row store",
865                       width_in_mbs * 64 * 2,
866                       0x1000);
867     assert(bo);
868     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
869     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
870
871     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
872     bo = dri_bo_alloc(i965->intel.bufmgr,
873                       "mpr row store",
874                       width_in_mbs * 64 * 2,
875                       0x1000);
876     assert(bo);
877     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
878     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
879
880     gen7_mfd_context->bitplane_read_buffer.valid = 0;
881 }
882
883 static void
884 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
885                             struct decode_state *decode_state,
886                             struct gen7_mfd_context *gen7_mfd_context)
887 {
888     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
889     VAPictureParameterBufferH264 *pic_param;
890     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
891     dri_bo *slice_data_bo;
892     int i, j;
893
894     assert(decode_state->pic_param && decode_state->pic_param->buffer);
895     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
896     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
897
898     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
899     intel_batchbuffer_emit_mi_flush(batch);
900     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
901     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
902     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
903     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
904     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
905     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
906     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
907
908     for (j = 0; j < decode_state->num_slice_params; j++) {
909         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
910         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
911         slice_data_bo = decode_state->slice_datas[j]->bo;
912         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
913
914         if (j == decode_state->num_slice_params - 1)
915             next_slice_group_param = NULL;
916         else
917             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
918
919         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
920             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
921             assert((slice_param->slice_type == SLICE_TYPE_I) ||
922                    (slice_param->slice_type == SLICE_TYPE_SI) ||
923                    (slice_param->slice_type == SLICE_TYPE_P) ||
924                    (slice_param->slice_type == SLICE_TYPE_SP) ||
925                    (slice_param->slice_type == SLICE_TYPE_B));
926
927             if (i < decode_state->slice_params[j]->num_elements - 1)
928                 next_slice_param = slice_param + 1;
929             else
930                 next_slice_param = next_slice_group_param;
931
932             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
933             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
934             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
935             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
936             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
937             slice_param++;
938         }
939     }
940
941     intel_batchbuffer_end_atomic(batch);
942     intel_batchbuffer_flush(batch);
943 }
944
945 static void
946 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
947                            struct decode_state *decode_state,
948                            struct gen7_mfd_context *gen7_mfd_context)
949 {
950     VAPictureParameterBufferMPEG2 *pic_param;
951     struct i965_driver_data *i965 = i965_driver_data(ctx);
952     struct object_surface *obj_surface;
953     dri_bo *bo;
954     unsigned int width_in_mbs;
955
956     assert(decode_state->pic_param && decode_state->pic_param->buffer);
957     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
958     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
959
960     mpeg2_set_reference_surfaces(
961         ctx,
962         gen7_mfd_context->reference_surface,
963         decode_state,
964         pic_param
965     );
966
967     /* Current decoded picture */
968     obj_surface = decode_state->render_object;
969     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
970
971     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
972     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
973     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
974     gen7_mfd_context->pre_deblocking_output.valid = 1;
975
976     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
977     bo = dri_bo_alloc(i965->intel.bufmgr,
978                       "bsd mpc row store",
979                       width_in_mbs * 96,
980                       0x1000);
981     assert(bo);
982     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
983     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
984
985     gen7_mfd_context->post_deblocking_output.valid = 0;
986     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
987     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
988     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
989     gen7_mfd_context->bitplane_read_buffer.valid = 0;
990 }
991
992 static void
993 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
994                          struct decode_state *decode_state,
995                          struct gen7_mfd_context *gen7_mfd_context)
996 {
997     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
998     VAPictureParameterBufferMPEG2 *pic_param;
999     unsigned int slice_concealment_disable_bit = 0;
1000
1001     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1002     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1003
1004     slice_concealment_disable_bit = 1;
1005
1006     BEGIN_BCS_BATCH(batch, 13);
1007     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1008     OUT_BCS_BATCH(batch,
1009                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1010                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1011                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1012                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1013                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1014                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1015                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1016                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1017                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1018                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1019                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1020                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1021     OUT_BCS_BATCH(batch,
1022                   pic_param->picture_coding_type << 9);
1023     OUT_BCS_BATCH(batch,
1024                   (slice_concealment_disable_bit << 31) |
1025                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1026                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1027     OUT_BCS_BATCH(batch, 0);
1028     OUT_BCS_BATCH(batch, 0);
1029     OUT_BCS_BATCH(batch, 0);
1030     OUT_BCS_BATCH(batch, 0);
1031     OUT_BCS_BATCH(batch, 0);
1032     OUT_BCS_BATCH(batch, 0);
1033     OUT_BCS_BATCH(batch, 0);
1034     OUT_BCS_BATCH(batch, 0);
1035     OUT_BCS_BATCH(batch, 0);
1036     ADVANCE_BCS_BATCH(batch);
1037 }
1038
1039 static void
1040 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1041                         struct decode_state *decode_state,
1042                         struct gen7_mfd_context *gen7_mfd_context)
1043 {
1044     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1045     int i, j;
1046
1047     /* Update internal QM state */
1048     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1049         VAIQMatrixBufferMPEG2 * const iq_matrix =
1050             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1051
1052         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1053             iq_matrix->load_intra_quantiser_matrix) {
1054             gen_iq_matrix->load_intra_quantiser_matrix =
1055                 iq_matrix->load_intra_quantiser_matrix;
1056             if (iq_matrix->load_intra_quantiser_matrix) {
1057                 for (j = 0; j < 64; j++)
1058                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1059                         iq_matrix->intra_quantiser_matrix[j];
1060             }
1061         }
1062
1063         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1064             iq_matrix->load_non_intra_quantiser_matrix) {
1065             gen_iq_matrix->load_non_intra_quantiser_matrix =
1066                 iq_matrix->load_non_intra_quantiser_matrix;
1067             if (iq_matrix->load_non_intra_quantiser_matrix) {
1068                 for (j = 0; j < 64; j++)
1069                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1070                         iq_matrix->non_intra_quantiser_matrix[j];
1071             }
1072         }
1073     }
1074
1075     /* Commit QM state to HW */
1076     for (i = 0; i < 2; i++) {
1077         unsigned char *qm = NULL;
1078         int qm_type;
1079
1080         if (i == 0) {
1081             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1082                 qm = gen_iq_matrix->intra_quantiser_matrix;
1083                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1084             }
1085         } else {
1086             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1087                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1088                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1089             }
1090         }
1091
1092         if (!qm)
1093             continue;
1094
1095         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1096     }
1097 }
1098
1099 static void
1100 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1101                           VAPictureParameterBufferMPEG2 *pic_param,
1102                           VASliceParameterBufferMPEG2 *slice_param,
1103                           VASliceParameterBufferMPEG2 *next_slice_param,
1104                           struct gen7_mfd_context *gen7_mfd_context)
1105 {
1106     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1107     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1108     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1109
1110     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1111         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1112         is_field_pic = 1;
1113     is_field_pic_wa = is_field_pic &&
1114         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1115
1116     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1117     hpos0 = slice_param->slice_horizontal_position;
1118
1119     if (next_slice_param == NULL) {
1120         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1121         hpos1 = 0;
1122     } else {
1123         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1124         hpos1 = next_slice_param->slice_horizontal_position;
1125     }
1126
1127     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1128
1129     BEGIN_BCS_BATCH(batch, 5);
1130     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1131     OUT_BCS_BATCH(batch, 
1132                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1133     OUT_BCS_BATCH(batch, 
1134                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1135     OUT_BCS_BATCH(batch,
1136                   hpos0 << 24 |
1137                   vpos0 << 16 |
1138                   mb_count << 8 |
1139                   (next_slice_param == NULL) << 5 |
1140                   (next_slice_param == NULL) << 3 |
1141                   (slice_param->macroblock_offset & 0x7));
1142     OUT_BCS_BATCH(batch,
1143                   (slice_param->quantiser_scale_code << 24) |
1144                   (vpos1 << 8 | hpos1));
1145     ADVANCE_BCS_BATCH(batch);
1146 }
1147
1148 static void
1149 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1150                               struct decode_state *decode_state,
1151                               struct gen7_mfd_context *gen7_mfd_context)
1152 {
1153     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1154     VAPictureParameterBufferMPEG2 *pic_param;
1155     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1156     dri_bo *slice_data_bo;
1157     int i, j;
1158
1159     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1160     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1161
1162     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1163     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1164     intel_batchbuffer_emit_mi_flush(batch);
1165     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1166     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1167     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1168     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1169     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1170     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1171
1172     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1173         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1174             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1175
1176     for (j = 0; j < decode_state->num_slice_params; j++) {
1177         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1178         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1179         slice_data_bo = decode_state->slice_datas[j]->bo;
1180         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1181
1182         if (j == decode_state->num_slice_params - 1)
1183             next_slice_group_param = NULL;
1184         else
1185             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1186
1187         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1188             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1189
1190             if (i < decode_state->slice_params[j]->num_elements - 1)
1191                 next_slice_param = slice_param + 1;
1192             else
1193                 next_slice_param = next_slice_group_param;
1194
1195             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1196             slice_param++;
1197         }
1198     }
1199
1200     intel_batchbuffer_end_atomic(batch);
1201     intel_batchbuffer_flush(batch);
1202 }
1203
1204 static const int va_to_gen7_vc1_pic_type[5] = {
1205     GEN7_VC1_I_PICTURE,
1206     GEN7_VC1_P_PICTURE,
1207     GEN7_VC1_B_PICTURE,
1208     GEN7_VC1_BI_PICTURE,
1209     GEN7_VC1_P_PICTURE,
1210 };
1211
1212 static const int va_to_gen7_vc1_mv[4] = {
1213     1, /* 1-MV */
1214     2, /* 1-MV half-pel */
1215     3, /* 1-MV half-pef bilinear */
1216     0, /* Mixed MV */
1217 };
1218
1219 static const int b_picture_scale_factor[21] = {
1220     128, 85,  170, 64,  192,
1221     51,  102, 153, 204, 43,
1222     215, 37,  74,  111, 148,
1223     185, 222, 32,  96,  160, 
1224     224,
1225 };
1226
1227 static const int va_to_gen7_vc1_condover[3] = {
1228     0,
1229     2,
1230     3
1231 };
1232
1233 static const int va_to_gen7_vc1_profile[4] = {
1234     GEN7_VC1_SIMPLE_PROFILE,
1235     GEN7_VC1_MAIN_PROFILE,
1236     GEN7_VC1_RESERVED_PROFILE,
1237     GEN7_VC1_ADVANCED_PROFILE
1238 };
1239
1240 static void 
1241 gen8_mfd_free_vc1_surface(void **data)
1242 {
1243     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1244
1245     if (!gen7_vc1_surface)
1246         return;
1247
1248     dri_bo_unreference(gen7_vc1_surface->dmv);
1249     free(gen7_vc1_surface);
1250     *data = NULL;
1251 }
1252
1253 static void
1254 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1255                           VAPictureParameterBufferVC1 *pic_param,
1256                           struct object_surface *obj_surface)
1257 {
1258     struct i965_driver_data *i965 = i965_driver_data(ctx);
1259     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1260     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1261     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1262
1263     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1264
1265     if (!gen7_vc1_surface) {
1266         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1267         assert((obj_surface->size & 0x3f) == 0);
1268         obj_surface->private_data = gen7_vc1_surface;
1269     }
1270
1271     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1272
1273     if (gen7_vc1_surface->dmv == NULL) {
1274         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1275                                              "direct mv w/r buffer",
1276                                              width_in_mbs * height_in_mbs * 64,
1277                                              0x1000);
1278     }
1279 }
1280
1281 static void
1282 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1283                          struct decode_state *decode_state,
1284                          struct gen7_mfd_context *gen7_mfd_context)
1285 {
1286     VAPictureParameterBufferVC1 *pic_param;
1287     struct i965_driver_data *i965 = i965_driver_data(ctx);
1288     struct object_surface *obj_surface;
1289     dri_bo *bo;
1290     int width_in_mbs;
1291     int picture_type;
1292
1293     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1294     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1295     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1296     picture_type = pic_param->picture_fields.bits.picture_type;
1297  
1298     intel_update_vc1_frame_store_index(ctx,
1299                                        decode_state,
1300                                        pic_param,
1301                                        gen7_mfd_context->reference_surface);
1302
1303     /* Current decoded picture */
1304     obj_surface = decode_state->render_object;
1305     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1306     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1307
1308     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1309     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1310     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1311     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1312
1313     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1314     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1315     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1316     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1317
1318     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1319     bo = dri_bo_alloc(i965->intel.bufmgr,
1320                       "intra row store",
1321                       width_in_mbs * 64,
1322                       0x1000);
1323     assert(bo);
1324     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1325     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1326
1327     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1328     bo = dri_bo_alloc(i965->intel.bufmgr,
1329                       "deblocking filter row store",
1330                       width_in_mbs * 7 * 64,
1331                       0x1000);
1332     assert(bo);
1333     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1334     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1335
1336     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1337     bo = dri_bo_alloc(i965->intel.bufmgr,
1338                       "bsd mpc row store",
1339                       width_in_mbs * 96,
1340                       0x1000);
1341     assert(bo);
1342     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1343     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1344
1345     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1346
1347     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1348     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1349     
1350     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1351         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1352         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1353         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1354         int src_w, src_h;
1355         uint8_t *src = NULL, *dst = NULL;
1356
1357         assert(decode_state->bit_plane->buffer);
1358         src = decode_state->bit_plane->buffer;
1359
1360         bo = dri_bo_alloc(i965->intel.bufmgr,
1361                           "VC-1 Bitplane",
1362                           bitplane_width * height_in_mbs,
1363                           0x1000);
1364         assert(bo);
1365         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1366
1367         dri_bo_map(bo, True);
1368         assert(bo->virtual);
1369         dst = bo->virtual;
1370
1371         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1372             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1373                 int src_index, dst_index;
1374                 int src_shift;
1375                 uint8_t src_value;
1376
1377                 src_index = (src_h * width_in_mbs + src_w) / 2;
1378                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1379                 src_value = ((src[src_index] >> src_shift) & 0xf);
1380
1381                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1382                     src_value |= 0x2;
1383                 }
1384
1385                 dst_index = src_w / 2;
1386                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1387             }
1388
1389             if (src_w & 1)
1390                 dst[src_w / 2] >>= 4;
1391
1392             dst += bitplane_width;
1393         }
1394
1395         dri_bo_unmap(bo);
1396     } else
1397         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1398 }
1399
1400 static void
1401 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1402                        struct decode_state *decode_state,
1403                        struct gen7_mfd_context *gen7_mfd_context)
1404 {
1405     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1406     VAPictureParameterBufferVC1 *pic_param;
1407     struct object_surface *obj_surface;
1408     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1409     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1410     int unified_mv_mode;
1411     int ref_field_pic_polarity = 0;
1412     int scale_factor = 0;
1413     int trans_ac_y = 0;
1414     int dmv_surface_valid = 0;
1415     int brfd = 0;
1416     int fcm = 0;
1417     int picture_type;
1418     int profile;
1419     int overlap;
1420     int interpolation_mode = 0;
1421
1422     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1423     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1424
1425     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1426     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1427     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1428     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1429     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1430     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1431     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1432     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1433
1434     if (dquant == 0) {
1435         alt_pquant_config = 0;
1436         alt_pquant_edge_mask = 0;
1437     } else if (dquant == 2) {
1438         alt_pquant_config = 1;
1439         alt_pquant_edge_mask = 0xf;
1440     } else {
1441         assert(dquant == 1);
1442         if (dquantfrm == 0) {
1443             alt_pquant_config = 0;
1444             alt_pquant_edge_mask = 0;
1445             alt_pq = 0;
1446         } else {
1447             assert(dquantfrm == 1);
1448             alt_pquant_config = 1;
1449
1450             switch (dqprofile) {
1451             case 3:
1452                 if (dqbilevel == 0) {
1453                     alt_pquant_config = 2;
1454                     alt_pquant_edge_mask = 0;
1455                 } else {
1456                     assert(dqbilevel == 1);
1457                     alt_pquant_config = 3;
1458                     alt_pquant_edge_mask = 0;
1459                 }
1460                 break;
1461                 
1462             case 0:
1463                 alt_pquant_edge_mask = 0xf;
1464                 break;
1465
1466             case 1:
1467                 if (dqdbedge == 3)
1468                     alt_pquant_edge_mask = 0x9;
1469                 else
1470                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1471
1472                 break;
1473
1474             case 2:
1475                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1476                 break;
1477
1478             default:
1479                 assert(0);
1480             }
1481         }
1482     }
1483
1484     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1485         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1486         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1487     } else {
1488         assert(pic_param->mv_fields.bits.mv_mode < 4);
1489         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1490     }
1491
1492     if (pic_param->sequence_fields.bits.interlace == 1 &&
1493         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1494         /* FIXME: calculate reference field picture polarity */
1495         assert(0);
1496         ref_field_pic_polarity = 0;
1497     }
1498
1499     if (pic_param->b_picture_fraction < 21)
1500         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1501
1502     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1503     
1504     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1505         picture_type == GEN7_VC1_I_PICTURE)
1506         picture_type = GEN7_VC1_BI_PICTURE;
1507
1508     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1509         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1510     else {
1511         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1512
1513         /*
1514          * 8.3.6.2.1 Transform Type Selection
1515          * If variable-sized transform coding is not enabled,
1516          * then the 8x8 transform shall be used for all blocks.
1517          * it is also MFX_VC1_PIC_STATE requirement.
1518          */
1519         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1520             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1521             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1522         }
1523     }
1524
1525     if (picture_type == GEN7_VC1_B_PICTURE) {
1526         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1527
1528         obj_surface = decode_state->reference_objects[1];
1529
1530         if (obj_surface)
1531             gen7_vc1_surface = obj_surface->private_data;
1532
1533         if (!gen7_vc1_surface || 
1534             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1535              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1536             dmv_surface_valid = 0;
1537         else
1538             dmv_surface_valid = 1;
1539     }
1540
1541     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1542
1543     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1544         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1545     else {
1546         if (pic_param->picture_fields.bits.top_field_first)
1547             fcm = 2;
1548         else
1549             fcm = 3;
1550     }
1551
1552     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1553         brfd = pic_param->reference_fields.bits.reference_distance;
1554         brfd = (scale_factor * brfd) >> 8;
1555         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1556
1557         if (brfd < 0)
1558             brfd = 0;
1559     }
1560
1561     overlap = 0;
1562     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1563         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1564             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1565             overlap = 1; 
1566         }
1567     }else {
1568         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1569              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1570               overlap = 1; 
1571         }
1572         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1573             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1574              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1575                 overlap = 1; 
1576              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1577                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1578                  overlap = 1;
1579              }
1580         }
1581     } 
1582
1583     assert(pic_param->conditional_overlap_flag < 3);
1584     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1585
1586     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1587         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1588          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1589         interpolation_mode = 9; /* Half-pel bilinear */
1590     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1591              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1592               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1593         interpolation_mode = 1; /* Half-pel bicubic */
1594     else
1595         interpolation_mode = 0; /* Quarter-pel bicubic */
1596
1597     BEGIN_BCS_BATCH(batch, 6);
1598     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1599     OUT_BCS_BATCH(batch,
1600                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1601                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1602     OUT_BCS_BATCH(batch,
1603                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1604                   dmv_surface_valid << 15 |
1605                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1606                   pic_param->rounding_control << 13 |
1607                   pic_param->sequence_fields.bits.syncmarker << 12 |
1608                   interpolation_mode << 8 |
1609                   0 << 7 | /* FIXME: scale up or down ??? */
1610                   pic_param->range_reduction_frame << 6 |
1611                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1612                   overlap << 4 |
1613                   !pic_param->picture_fields.bits.is_first_field << 3 |
1614                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1615     OUT_BCS_BATCH(batch,
1616                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1617                   picture_type << 26 |
1618                   fcm << 24 |
1619                   alt_pq << 16 |
1620                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1621                   scale_factor << 0);
1622     OUT_BCS_BATCH(batch,
1623                   unified_mv_mode << 28 |
1624                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1625                   pic_param->fast_uvmc_flag << 26 |
1626                   ref_field_pic_polarity << 25 |
1627                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1628                   pic_param->reference_fields.bits.reference_distance << 20 |
1629                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1630                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1631                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1632                   alt_pquant_edge_mask << 4 |
1633                   alt_pquant_config << 2 |
1634                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1635                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1636     OUT_BCS_BATCH(batch,
1637                   !!pic_param->bitplane_present.value << 31 |
1638                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1639                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1640                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1641                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1642                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1643                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1644                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1645                   pic_param->mv_fields.bits.mv_table << 20 |
1646                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1647                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1648                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1649                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1650                   pic_param->mb_mode_table << 8 |
1651                   trans_ac_y << 6 |
1652                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1653                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1654                   pic_param->cbp_table << 0);
1655     ADVANCE_BCS_BATCH(batch);
1656 }
1657
1658 static void
1659 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1660                              struct decode_state *decode_state,
1661                              struct gen7_mfd_context *gen7_mfd_context)
1662 {
1663     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1664     VAPictureParameterBufferVC1 *pic_param;
1665     int intensitycomp_single;
1666
1667     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1668     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1669
1670     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1671     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1672     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1673
1674     BEGIN_BCS_BATCH(batch, 6);
1675     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1676     OUT_BCS_BATCH(batch,
1677                   0 << 14 | /* FIXME: double ??? */
1678                   0 << 12 |
1679                   intensitycomp_single << 10 |
1680                   intensitycomp_single << 8 |
1681                   0 << 4 | /* FIXME: interlace mode */
1682                   0);
1683     OUT_BCS_BATCH(batch,
1684                   pic_param->luma_shift << 16 |
1685                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1686     OUT_BCS_BATCH(batch, 0);
1687     OUT_BCS_BATCH(batch, 0);
1688     OUT_BCS_BATCH(batch, 0);
1689     ADVANCE_BCS_BATCH(batch);
1690 }
1691
1692 static void
1693 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1694                               struct decode_state *decode_state,
1695                               struct gen7_mfd_context *gen7_mfd_context)
1696 {
1697     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1698     struct object_surface *obj_surface;
1699     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1700
1701     obj_surface = decode_state->render_object;
1702
1703     if (obj_surface && obj_surface->private_data) {
1704         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1705     }
1706
1707     obj_surface = decode_state->reference_objects[1];
1708
1709     if (obj_surface && obj_surface->private_data) {
1710         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1711     }
1712
1713     BEGIN_BCS_BATCH(batch, 7);
1714     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1715
1716     if (dmv_write_buffer)
1717         OUT_BCS_RELOC(batch, dmv_write_buffer,
1718                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1719                       0);
1720     else
1721         OUT_BCS_BATCH(batch, 0);
1722
1723     OUT_BCS_BATCH(batch, 0);
1724     OUT_BCS_BATCH(batch, 0);
1725
1726     if (dmv_read_buffer)
1727         OUT_BCS_RELOC(batch, dmv_read_buffer,
1728                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1729                       0);
1730     else
1731         OUT_BCS_BATCH(batch, 0);
1732     
1733     OUT_BCS_BATCH(batch, 0);
1734     OUT_BCS_BATCH(batch, 0);
1735                   
1736     ADVANCE_BCS_BATCH(batch);
1737 }
1738
1739 static int
1740 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1741 {
1742     int out_slice_data_bit_offset;
1743     int slice_header_size = in_slice_data_bit_offset / 8;
1744     int i, j;
1745
1746     if (profile != 3)
1747         out_slice_data_bit_offset = in_slice_data_bit_offset;
1748     else {
1749         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1750             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1751                 i++, j += 2;
1752             }
1753         }
1754
1755         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1756     }
1757
1758     return out_slice_data_bit_offset;
1759 }
1760
1761 static void
1762 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1763                         VAPictureParameterBufferVC1 *pic_param,
1764                         VASliceParameterBufferVC1 *slice_param,
1765                         VASliceParameterBufferVC1 *next_slice_param,
1766                         dri_bo *slice_data_bo,
1767                         struct gen7_mfd_context *gen7_mfd_context)
1768 {
1769     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1770     int next_slice_start_vert_pos;
1771     int macroblock_offset;
1772     uint8_t *slice_data = NULL;
1773
1774     dri_bo_map(slice_data_bo, 0);
1775     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1776     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1777                                                                slice_param->macroblock_offset,
1778                                                                pic_param->sequence_fields.bits.profile);
1779     dri_bo_unmap(slice_data_bo);
1780
1781     if (next_slice_param)
1782         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1783     else
1784         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1785
1786     BEGIN_BCS_BATCH(batch, 5);
1787     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1788     OUT_BCS_BATCH(batch, 
1789                   slice_param->slice_data_size - (macroblock_offset >> 3));
1790     OUT_BCS_BATCH(batch, 
1791                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1792     OUT_BCS_BATCH(batch,
1793                   slice_param->slice_vertical_position << 16 |
1794                   next_slice_start_vert_pos << 0);
1795     OUT_BCS_BATCH(batch,
1796                   (macroblock_offset & 0x7));
1797     ADVANCE_BCS_BATCH(batch);
1798 }
1799
1800 static void
1801 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1802                             struct decode_state *decode_state,
1803                             struct gen7_mfd_context *gen7_mfd_context)
1804 {
1805     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1806     VAPictureParameterBufferVC1 *pic_param;
1807     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1808     dri_bo *slice_data_bo;
1809     int i, j;
1810
1811     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1812     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1813
1814     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1815     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1816     intel_batchbuffer_emit_mi_flush(batch);
1817     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1818     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1819     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1820     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1821     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1822     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1823     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1824
1825     for (j = 0; j < decode_state->num_slice_params; j++) {
1826         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1827         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1828         slice_data_bo = decode_state->slice_datas[j]->bo;
1829         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1830
1831         if (j == decode_state->num_slice_params - 1)
1832             next_slice_group_param = NULL;
1833         else
1834             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1835
1836         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1837             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1838
1839             if (i < decode_state->slice_params[j]->num_elements - 1)
1840                 next_slice_param = slice_param + 1;
1841             else
1842                 next_slice_param = next_slice_group_param;
1843
1844             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1845             slice_param++;
1846         }
1847     }
1848
1849     intel_batchbuffer_end_atomic(batch);
1850     intel_batchbuffer_flush(batch);
1851 }
1852
1853 static void
1854 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1855                           struct decode_state *decode_state,
1856                           struct gen7_mfd_context *gen7_mfd_context)
1857 {
1858     struct object_surface *obj_surface;
1859     VAPictureParameterBufferJPEGBaseline *pic_param;
1860     int subsampling = SUBSAMPLE_YUV420;
1861     int fourcc = VA_FOURCC_IMC3;
1862
1863     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1864
1865     if (pic_param->num_components == 1)
1866         subsampling = SUBSAMPLE_YUV400;
1867     else if (pic_param->num_components == 3) {
1868         int h1 = pic_param->components[0].h_sampling_factor;
1869         int h2 = pic_param->components[1].h_sampling_factor;
1870         int h3 = pic_param->components[2].h_sampling_factor;
1871         int v1 = pic_param->components[0].v_sampling_factor;
1872         int v2 = pic_param->components[1].v_sampling_factor;
1873         int v3 = pic_param->components[2].v_sampling_factor;
1874
1875         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1876             v1 == 2 && v2 == 1 && v3 == 1) {
1877             subsampling = SUBSAMPLE_YUV420;
1878             fourcc = VA_FOURCC_IMC3;
1879         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1880                    v1 == 1 && v2 == 1 && v3 == 1) {
1881             subsampling = SUBSAMPLE_YUV422H;
1882             fourcc = VA_FOURCC_422H;
1883         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1884                    v1 == 1 && v2 == 1 && v3 == 1) {
1885             subsampling = SUBSAMPLE_YUV444;
1886             fourcc = VA_FOURCC_444P;
1887         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1888                    v1 == 1 && v2 == 1 && v3 == 1) {
1889             subsampling = SUBSAMPLE_YUV411;
1890             fourcc = VA_FOURCC_411P;
1891         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1892                    v1 == 2 && v2 == 1 && v3 == 1) {
1893             subsampling = SUBSAMPLE_YUV422V;
1894             fourcc = VA_FOURCC_422V;
1895         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1896                    v1 == 2 && v2 == 2 && v3 == 2) {
1897             subsampling = SUBSAMPLE_YUV422H;
1898             fourcc = VA_FOURCC_422H;
1899         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1900                    v1 == 2 && v2 == 1 && v3 == 1) {
1901             subsampling = SUBSAMPLE_YUV422V;
1902             fourcc = VA_FOURCC_422V;
1903         } else
1904             assert(0);
1905     }
1906     else {
1907         assert(0);
1908     }
1909
1910     /* Current decoded picture */
1911     obj_surface = decode_state->render_object;
1912     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1913
1914     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1915     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1916     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1917     gen7_mfd_context->pre_deblocking_output.valid = 1;
1918
1919     gen7_mfd_context->post_deblocking_output.bo = NULL;
1920     gen7_mfd_context->post_deblocking_output.valid = 0;
1921
1922     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1923     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1924
1925     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1926     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1927
1928     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1929     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1930
1931     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1932     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1933
1934     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1935     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1936 }
1937
1938 static const int va_to_gen7_jpeg_rotation[4] = {
1939     GEN7_JPEG_ROTATION_0,
1940     GEN7_JPEG_ROTATION_90,
1941     GEN7_JPEG_ROTATION_180,
1942     GEN7_JPEG_ROTATION_270
1943 };
1944
1945 static void
1946 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1947                         struct decode_state *decode_state,
1948                         struct gen7_mfd_context *gen7_mfd_context)
1949 {
1950     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1951     VAPictureParameterBufferJPEGBaseline *pic_param;
1952     int chroma_type = GEN7_YUV420;
1953     int frame_width_in_blks;
1954     int frame_height_in_blks;
1955
1956     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1957     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1958
1959     if (pic_param->num_components == 1)
1960         chroma_type = GEN7_YUV400;
1961     else if (pic_param->num_components == 3) {
1962         int h1 = pic_param->components[0].h_sampling_factor;
1963         int h2 = pic_param->components[1].h_sampling_factor;
1964         int h3 = pic_param->components[2].h_sampling_factor;
1965         int v1 = pic_param->components[0].v_sampling_factor;
1966         int v2 = pic_param->components[1].v_sampling_factor;
1967         int v3 = pic_param->components[2].v_sampling_factor;
1968
1969         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1970             v1 == 2 && v2 == 1 && v3 == 1)
1971             chroma_type = GEN7_YUV420;
1972         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1973                  v1 == 1 && v2 == 1 && v3 == 1)
1974             chroma_type = GEN7_YUV422H_2Y;
1975         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1976                  v1 == 1 && v2 == 1 && v3 == 1)
1977             chroma_type = GEN7_YUV444;
1978         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1979                  v1 == 1 && v2 == 1 && v3 == 1)
1980             chroma_type = GEN7_YUV411;
1981         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1982                  v1 == 2 && v2 == 1 && v3 == 1)
1983             chroma_type = GEN7_YUV422V_2Y;
1984         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1985                  v1 == 2 && v2 == 2 && v3 == 2)
1986             chroma_type = GEN7_YUV422H_4Y;
1987         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1988                  v1 == 2 && v2 == 1 && v3 == 1)
1989             chroma_type = GEN7_YUV422V_4Y;
1990         else
1991             assert(0);
1992     }
1993
1994     if (chroma_type == GEN7_YUV400 ||
1995         chroma_type == GEN7_YUV444 ||
1996         chroma_type == GEN7_YUV422V_2Y) {
1997         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
1998         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
1999     } else if (chroma_type == GEN7_YUV411) {
2000         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2001         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2002     } else {
2003         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2004         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2005     }
2006
2007     BEGIN_BCS_BATCH(batch, 3);
2008     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2009     OUT_BCS_BATCH(batch,
2010                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2011                   (chroma_type << 0));
2012     OUT_BCS_BATCH(batch,
2013                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2014                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2015     ADVANCE_BCS_BATCH(batch);
2016 }
2017
2018 static const int va_to_gen7_jpeg_hufftable[2] = {
2019     MFX_HUFFTABLE_ID_Y,
2020     MFX_HUFFTABLE_ID_UV
2021 };
2022
2023 static void
2024 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2025                                struct decode_state *decode_state,
2026                                struct gen7_mfd_context *gen7_mfd_context,
2027                                int num_tables)
2028 {
2029     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2030     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2031     int index;
2032
2033     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2034         return;
2035
2036     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2037
2038     for (index = 0; index < num_tables; index++) {
2039         int id = va_to_gen7_jpeg_hufftable[index];
2040         if (!huffman_table->load_huffman_table[index])
2041             continue;
2042         BEGIN_BCS_BATCH(batch, 53);
2043         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2044         OUT_BCS_BATCH(batch, id);
2045         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2046         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2047         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2048         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2049         ADVANCE_BCS_BATCH(batch);
2050     }
2051 }
2052
2053 static const int va_to_gen7_jpeg_qm[5] = {
2054     -1,
2055     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2056     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2057     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2058     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2059 };
2060
2061 static void
2062 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2063                        struct decode_state *decode_state,
2064                        struct gen7_mfd_context *gen7_mfd_context)
2065 {
2066     VAPictureParameterBufferJPEGBaseline *pic_param;
2067     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2068     int index;
2069
2070     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2071         return;
2072
2073     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2074     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2075
2076     assert(pic_param->num_components <= 3);
2077
2078     for (index = 0; index < pic_param->num_components; index++) {
2079         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2080         int qm_type;
2081         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2082         unsigned char raster_qm[64];
2083         int j;
2084
2085         if (id > 4 || id < 1)
2086             continue;
2087
2088         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2089             continue;
2090
2091         qm_type = va_to_gen7_jpeg_qm[id];
2092
2093         for (j = 0; j < 64; j++)
2094             raster_qm[zigzag_direct[j]] = qm[j];
2095
2096         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2097     }
2098 }
2099
2100 static void
2101 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2102                          VAPictureParameterBufferJPEGBaseline *pic_param,
2103                          VASliceParameterBufferJPEGBaseline *slice_param,
2104                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2105                          dri_bo *slice_data_bo,
2106                          struct gen7_mfd_context *gen7_mfd_context)
2107 {
2108     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2109     int scan_component_mask = 0;
2110     int i;
2111
2112     assert(slice_param->num_components > 0);
2113     assert(slice_param->num_components < 4);
2114     assert(slice_param->num_components <= pic_param->num_components);
2115
2116     for (i = 0; i < slice_param->num_components; i++) {
2117         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2118         case 1:
2119             scan_component_mask |= (1 << 0);
2120             break;
2121         case 2:
2122             scan_component_mask |= (1 << 1);
2123             break;
2124         case 3:
2125             scan_component_mask |= (1 << 2);
2126             break;
2127         default:
2128             assert(0);
2129             break;
2130         }
2131     }
2132
2133     BEGIN_BCS_BATCH(batch, 6);
2134     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2135     OUT_BCS_BATCH(batch, 
2136                   slice_param->slice_data_size);
2137     OUT_BCS_BATCH(batch, 
2138                   slice_param->slice_data_offset);
2139     OUT_BCS_BATCH(batch,
2140                   slice_param->slice_horizontal_position << 16 |
2141                   slice_param->slice_vertical_position << 0);
2142     OUT_BCS_BATCH(batch,
2143                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2144                   (scan_component_mask << 27) |                 /* scan components */
2145                   (0 << 26) |   /* disable interrupt allowed */
2146                   (slice_param->num_mcus << 0));                /* MCU count */
2147     OUT_BCS_BATCH(batch,
2148                   (slice_param->restart_interval << 0));    /* RestartInterval */
2149     ADVANCE_BCS_BATCH(batch);
2150 }
2151
2152 /* Workaround for JPEG decoding on Ivybridge */
2153 #ifdef JPEG_WA
2154
2155 static struct {
2156     int width;
2157     int height;
2158     unsigned char data[32];
2159     int data_size;
2160     int data_bit_offset;
2161     int qp;
2162 } gen7_jpeg_wa_clip = {
2163     16,
2164     16,
2165     {
2166         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2167         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2168     },
2169     14,
2170     40,
2171     28,
2172 };
2173
2174 static void
2175 gen8_jpeg_wa_init(VADriverContextP ctx,
2176                   struct gen7_mfd_context *gen7_mfd_context)
2177 {
2178     struct i965_driver_data *i965 = i965_driver_data(ctx);
2179     VAStatus status;
2180     struct object_surface *obj_surface;
2181
2182     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2183         i965_DestroySurfaces(ctx,
2184                              &gen7_mfd_context->jpeg_wa_surface_id,
2185                              1);
2186
2187     status = i965_CreateSurfaces(ctx,
2188                                  gen7_jpeg_wa_clip.width,
2189                                  gen7_jpeg_wa_clip.height,
2190                                  VA_RT_FORMAT_YUV420,
2191                                  1,
2192                                  &gen7_mfd_context->jpeg_wa_surface_id);
2193     assert(status == VA_STATUS_SUCCESS);
2194
2195     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2196     assert(obj_surface);
2197     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2198     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2199
2200     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2201         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2202                                                                "JPEG WA data",
2203                                                                0x1000,
2204                                                                0x1000);
2205         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2206                        0,
2207                        gen7_jpeg_wa_clip.data_size,
2208                        gen7_jpeg_wa_clip.data);
2209     }
2210 }
2211
2212 static void
2213 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2214                               struct gen7_mfd_context *gen7_mfd_context)
2215 {
2216     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2217
2218     BEGIN_BCS_BATCH(batch, 5);
2219     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2220     OUT_BCS_BATCH(batch,
2221                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2222                   (MFD_MODE_VLD << 15) | /* VLD mode */
2223                   (0 << 10) | /* disable Stream-Out */
2224                   (0 << 9)  | /* Post Deblocking Output */
2225                   (1 << 8)  | /* Pre Deblocking Output */
2226                   (0 << 5)  | /* not in stitch mode */
2227                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2228                   (MFX_FORMAT_AVC << 0));
2229     OUT_BCS_BATCH(batch,
2230                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2231                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2232                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2233                   (0 << 1)  |
2234                   (0 << 0));
2235     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2236     OUT_BCS_BATCH(batch, 0); /* reserved */
2237     ADVANCE_BCS_BATCH(batch);
2238 }
2239
2240 static void
2241 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2242                            struct gen7_mfd_context *gen7_mfd_context)
2243 {
2244     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2245     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2246
2247     BEGIN_BCS_BATCH(batch, 6);
2248     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2249     OUT_BCS_BATCH(batch, 0);
2250     OUT_BCS_BATCH(batch,
2251                   ((obj_surface->orig_width - 1) << 18) |
2252                   ((obj_surface->orig_height - 1) << 4));
2253     OUT_BCS_BATCH(batch,
2254                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2255                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2256                   (0 << 22) | /* surface object control state, ignored */
2257                   ((obj_surface->width - 1) << 3) | /* pitch */
2258                   (0 << 2)  | /* must be 0 */
2259                   (1 << 1)  | /* must be tiled */
2260                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2261     OUT_BCS_BATCH(batch,
2262                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2263                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2264     OUT_BCS_BATCH(batch,
2265                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2266                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2267     ADVANCE_BCS_BATCH(batch);
2268 }
2269
2270 static void
2271 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2272                                  struct gen7_mfd_context *gen7_mfd_context)
2273 {
2274     struct i965_driver_data *i965 = i965_driver_data(ctx);
2275     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2276     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2277     dri_bo *intra_bo;
2278     int i;
2279
2280     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2281                             "intra row store",
2282                             128 * 64,
2283                             0x1000);
2284
2285     BEGIN_BCS_BATCH(batch, 61);
2286     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2287     OUT_BCS_RELOC(batch,
2288                   obj_surface->bo,
2289                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2290                   0);
2291         OUT_BCS_BATCH(batch, 0);
2292         OUT_BCS_BATCH(batch, 0);
2293     
2294
2295     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2296         OUT_BCS_BATCH(batch, 0);
2297         OUT_BCS_BATCH(batch, 0);
2298
2299         /* uncompressed-video & stream out 7-12 */
2300     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2301     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2302         OUT_BCS_BATCH(batch, 0);
2303         OUT_BCS_BATCH(batch, 0);
2304         OUT_BCS_BATCH(batch, 0);
2305         OUT_BCS_BATCH(batch, 0);
2306
2307         /* the DW 13-15 is for intra row store scratch */
2308     OUT_BCS_RELOC(batch,
2309                   intra_bo,
2310                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2311                   0);
2312         OUT_BCS_BATCH(batch, 0);
2313         OUT_BCS_BATCH(batch, 0);
2314
2315         /* the DW 16-18 is for deblocking filter */ 
2316     OUT_BCS_BATCH(batch, 0);
2317         OUT_BCS_BATCH(batch, 0);
2318         OUT_BCS_BATCH(batch, 0);
2319
2320     /* DW 19..50 */
2321     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2322         OUT_BCS_BATCH(batch, 0);
2323         OUT_BCS_BATCH(batch, 0);
2324     }
2325     OUT_BCS_BATCH(batch, 0);
2326
2327         /* the DW52-54 is for mb status address */
2328     OUT_BCS_BATCH(batch, 0);
2329         OUT_BCS_BATCH(batch, 0);
2330         OUT_BCS_BATCH(batch, 0);
2331         /* the DW56-60 is for ILDB & second ILDB address */
2332     OUT_BCS_BATCH(batch, 0);
2333         OUT_BCS_BATCH(batch, 0);
2334         OUT_BCS_BATCH(batch, 0);
2335     OUT_BCS_BATCH(batch, 0);
2336         OUT_BCS_BATCH(batch, 0);
2337         OUT_BCS_BATCH(batch, 0);
2338
2339     ADVANCE_BCS_BATCH(batch);
2340
2341     dri_bo_unreference(intra_bo);
2342 }
2343
2344 static void
2345 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2346                                      struct gen7_mfd_context *gen7_mfd_context)
2347 {
2348     struct i965_driver_data *i965 = i965_driver_data(ctx);
2349     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2350     dri_bo *bsd_mpc_bo, *mpr_bo;
2351
2352     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2353                               "bsd mpc row store",
2354                               11520, /* 1.5 * 120 * 64 */
2355                               0x1000);
2356
2357     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2358                           "mpr row store",
2359                           7680, /* 1. 0 * 120 * 64 */
2360                           0x1000);
2361
2362     BEGIN_BCS_BATCH(batch, 10);
2363     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2364
2365     OUT_BCS_RELOC(batch,
2366                   bsd_mpc_bo,
2367                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2368                   0);
2369
2370     OUT_BCS_BATCH(batch, 0);
2371     OUT_BCS_BATCH(batch, 0);
2372
2373     OUT_BCS_RELOC(batch,
2374                   mpr_bo,
2375                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2376                   0);
2377     OUT_BCS_BATCH(batch, 0);
2378     OUT_BCS_BATCH(batch, 0);
2379
2380     OUT_BCS_BATCH(batch, 0);
2381     OUT_BCS_BATCH(batch, 0);
2382     OUT_BCS_BATCH(batch, 0);
2383
2384     ADVANCE_BCS_BATCH(batch);
2385
2386     dri_bo_unreference(bsd_mpc_bo);
2387     dri_bo_unreference(mpr_bo);
2388 }
2389
2390 static void
2391 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2392                           struct gen7_mfd_context *gen7_mfd_context)
2393 {
2394
2395 }
2396
2397 static void
2398 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2399                            struct gen7_mfd_context *gen7_mfd_context)
2400 {
2401     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2402     int img_struct = 0;
2403     int mbaff_frame_flag = 0;
2404     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2405
2406     BEGIN_BCS_BATCH(batch, 16);
2407     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2408     OUT_BCS_BATCH(batch, 
2409                   width_in_mbs * height_in_mbs);
2410     OUT_BCS_BATCH(batch, 
2411                   ((height_in_mbs - 1) << 16) | 
2412                   ((width_in_mbs - 1) << 0));
2413     OUT_BCS_BATCH(batch, 
2414                   (0 << 24) |
2415                   (0 << 16) |
2416                   (0 << 14) |
2417                   (0 << 13) |
2418                   (0 << 12) | /* differ from GEN6 */
2419                   (0 << 10) |
2420                   (img_struct << 8));
2421     OUT_BCS_BATCH(batch,
2422                   (1 << 10) | /* 4:2:0 */
2423                   (1 << 7) |  /* CABAC */
2424                   (0 << 6) |
2425                   (0 << 5) |
2426                   (0 << 4) |
2427                   (0 << 3) |
2428                   (1 << 2) |
2429                   (mbaff_frame_flag << 1) |
2430                   (0 << 0));
2431     OUT_BCS_BATCH(batch, 0);
2432     OUT_BCS_BATCH(batch, 0);
2433     OUT_BCS_BATCH(batch, 0);
2434     OUT_BCS_BATCH(batch, 0);
2435     OUT_BCS_BATCH(batch, 0);
2436     OUT_BCS_BATCH(batch, 0);
2437     OUT_BCS_BATCH(batch, 0);
2438     OUT_BCS_BATCH(batch, 0);
2439     OUT_BCS_BATCH(batch, 0);
2440     OUT_BCS_BATCH(batch, 0);
2441     OUT_BCS_BATCH(batch, 0);
2442     ADVANCE_BCS_BATCH(batch);
2443 }
2444
2445 static void
2446 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2447                                   struct gen7_mfd_context *gen7_mfd_context)
2448 {
2449     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2450     int i;
2451
2452     BEGIN_BCS_BATCH(batch, 71);
2453     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2454
2455     /* reference surfaces 0..15 */
2456     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2457         OUT_BCS_BATCH(batch, 0); /* top */
2458         OUT_BCS_BATCH(batch, 0); /* bottom */
2459     }
2460         
2461         OUT_BCS_BATCH(batch, 0);
2462
2463     /* the current decoding frame/field */
2464     OUT_BCS_BATCH(batch, 0); /* top */
2465     OUT_BCS_BATCH(batch, 0);
2466     OUT_BCS_BATCH(batch, 0);
2467
2468     /* POC List */
2469     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2470         OUT_BCS_BATCH(batch, 0);
2471         OUT_BCS_BATCH(batch, 0);
2472     }
2473
2474     OUT_BCS_BATCH(batch, 0);
2475     OUT_BCS_BATCH(batch, 0);
2476
2477     ADVANCE_BCS_BATCH(batch);
2478 }
2479
2480 static void
2481 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2482                                      struct gen7_mfd_context *gen7_mfd_context)
2483 {
2484     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2485
2486     BEGIN_BCS_BATCH(batch, 11);
2487     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2488     OUT_BCS_RELOC(batch,
2489                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2490                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2491                   0);
2492     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2493     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2494     OUT_BCS_BATCH(batch, 0);
2495     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2496     OUT_BCS_BATCH(batch, 0);
2497     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2498     OUT_BCS_BATCH(batch, 0);
2499     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2500     OUT_BCS_BATCH(batch, 0);
2501     ADVANCE_BCS_BATCH(batch);
2502 }
2503
2504 static void
2505 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2506                             struct gen7_mfd_context *gen7_mfd_context)
2507 {
2508     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2509
2510     /* the input bitsteam format on GEN7 differs from GEN6 */
2511     BEGIN_BCS_BATCH(batch, 6);
2512     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2513     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2514     OUT_BCS_BATCH(batch, 0);
2515     OUT_BCS_BATCH(batch,
2516                   (0 << 31) |
2517                   (0 << 14) |
2518                   (0 << 12) |
2519                   (0 << 10) |
2520                   (0 << 8));
2521     OUT_BCS_BATCH(batch,
2522                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2523                   (0 << 5)  |
2524                   (0 << 4)  |
2525                   (1 << 3) | /* LastSlice Flag */
2526                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2527     OUT_BCS_BATCH(batch, 0);
2528     ADVANCE_BCS_BATCH(batch);
2529 }
2530
2531 static void
2532 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2533                              struct gen7_mfd_context *gen7_mfd_context)
2534 {
2535     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2536     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2537     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2538     int first_mb_in_slice = 0;
2539     int slice_type = SLICE_TYPE_I;
2540
2541     BEGIN_BCS_BATCH(batch, 11);
2542     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2543     OUT_BCS_BATCH(batch, slice_type);
2544     OUT_BCS_BATCH(batch, 
2545                   (num_ref_idx_l1 << 24) |
2546                   (num_ref_idx_l0 << 16) |
2547                   (0 << 8) |
2548                   (0 << 0));
2549     OUT_BCS_BATCH(batch, 
2550                   (0 << 29) |
2551                   (1 << 27) |   /* disable Deblocking */
2552                   (0 << 24) |
2553                   (gen7_jpeg_wa_clip.qp << 16) |
2554                   (0 << 8) |
2555                   (0 << 0));
2556     OUT_BCS_BATCH(batch, 
2557                   (slice_ver_pos << 24) |
2558                   (slice_hor_pos << 16) | 
2559                   (first_mb_in_slice << 0));
2560     OUT_BCS_BATCH(batch,
2561                   (next_slice_ver_pos << 16) |
2562                   (next_slice_hor_pos << 0));
2563     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2564     OUT_BCS_BATCH(batch, 0);
2565     OUT_BCS_BATCH(batch, 0);
2566     OUT_BCS_BATCH(batch, 0);
2567     OUT_BCS_BATCH(batch, 0);
2568     ADVANCE_BCS_BATCH(batch);
2569 }
2570
2571 static void
2572 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2573                  struct gen7_mfd_context *gen7_mfd_context)
2574 {
2575     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2576     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2577     intel_batchbuffer_emit_mi_flush(batch);
2578     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2579     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2580     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2581     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2582     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2583     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2584     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2585
2586     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2587     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2588     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2589 }
2590
2591 #endif
2592
2593 void
2594 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2595                              struct decode_state *decode_state,
2596                              struct gen7_mfd_context *gen7_mfd_context)
2597 {
2598     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2599     VAPictureParameterBufferJPEGBaseline *pic_param;
2600     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2601     dri_bo *slice_data_bo;
2602     int i, j, max_selector = 0;
2603
2604     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2605     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2606
2607     /* Currently only support Baseline DCT */
2608     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2609     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2610 #ifdef JPEG_WA
2611     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2612 #endif
2613     intel_batchbuffer_emit_mi_flush(batch);
2614     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2615     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2616     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2617     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2618     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2619
2620     for (j = 0; j < decode_state->num_slice_params; j++) {
2621         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2622         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2623         slice_data_bo = decode_state->slice_datas[j]->bo;
2624         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2625
2626         if (j == decode_state->num_slice_params - 1)
2627             next_slice_group_param = NULL;
2628         else
2629             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2630
2631         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2632             int component;
2633
2634             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2635
2636             if (i < decode_state->slice_params[j]->num_elements - 1)
2637                 next_slice_param = slice_param + 1;
2638             else
2639                 next_slice_param = next_slice_group_param;
2640
2641             for (component = 0; component < slice_param->num_components; component++) {
2642                 if (max_selector < slice_param->components[component].dc_table_selector)
2643                     max_selector = slice_param->components[component].dc_table_selector;
2644
2645                 if (max_selector < slice_param->components[component].ac_table_selector)
2646                     max_selector = slice_param->components[component].ac_table_selector;
2647             }
2648
2649             slice_param++;
2650         }
2651     }
2652
2653     assert(max_selector < 2);
2654     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2655
2656     for (j = 0; j < decode_state->num_slice_params; j++) {
2657         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2658         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2659         slice_data_bo = decode_state->slice_datas[j]->bo;
2660         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2661
2662         if (j == decode_state->num_slice_params - 1)
2663             next_slice_group_param = NULL;
2664         else
2665             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2666
2667         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2668             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2669
2670             if (i < decode_state->slice_params[j]->num_elements - 1)
2671                 next_slice_param = slice_param + 1;
2672             else
2673                 next_slice_param = next_slice_group_param;
2674
2675             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2676             slice_param++;
2677         }
2678     }
2679
2680     intel_batchbuffer_end_atomic(batch);
2681     intel_batchbuffer_flush(batch);
2682 }
2683
2684 static const int vp8_dc_qlookup[128] =
2685 {
2686       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2687      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2688      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2689      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2690      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2691      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2692      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2693     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2694 };
2695
2696 static const int vp8_ac_qlookup[128] =
2697 {
2698       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2699      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2700      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2701      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2702      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2703     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2704     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2705     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2706 };
2707
2708 static inline unsigned int vp8_clip_quantization_index(int index)
2709 {
2710     if(index > 127)
2711         return 127;
2712     else if(index <0)
2713         return 0;
2714
2715     return index;
2716 }
2717
2718 static void
2719 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2720                           struct decode_state *decode_state,
2721                           struct gen7_mfd_context *gen7_mfd_context)
2722 {
2723     struct object_surface *obj_surface;
2724     struct i965_driver_data *i965 = i965_driver_data(ctx);
2725     dri_bo *bo;
2726     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2727     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2728     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2729
2730     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2731     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2732
2733     intel_update_vp8_frame_store_index(ctx,
2734                                        decode_state,
2735                                        pic_param,
2736                                        gen7_mfd_context->reference_surface);
2737
2738     /* Current decoded picture */
2739     obj_surface = decode_state->render_object;
2740     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2741
2742     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2743     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2744     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2745     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2746
2747     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2748     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2749     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2750     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2751
2752     intel_ensure_vp8_segmentation_buffer(ctx,
2753         &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2754
2755     /* The same as AVC */
2756     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2757     bo = dri_bo_alloc(i965->intel.bufmgr,
2758                       "intra row store",
2759                       width_in_mbs * 64,
2760                       0x1000);
2761     assert(bo);
2762     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2763     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2764
2765     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2766     bo = dri_bo_alloc(i965->intel.bufmgr,
2767                       "deblocking filter row store",
2768                       width_in_mbs * 64 * 4,
2769                       0x1000);
2770     assert(bo);
2771     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2772     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2773
2774     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2775     bo = dri_bo_alloc(i965->intel.bufmgr,
2776                       "bsd mpc row store",
2777                       width_in_mbs * 64 * 2,
2778                       0x1000);
2779     assert(bo);
2780     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2781     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2782
2783     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2784     bo = dri_bo_alloc(i965->intel.bufmgr,
2785                       "mpr row store",
2786                       width_in_mbs * 64 * 2,
2787                       0x1000);
2788     assert(bo);
2789     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2790     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2791
2792     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2793 }
2794
2795 static void
2796 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2797                        struct decode_state *decode_state,
2798                        struct gen7_mfd_context *gen7_mfd_context)
2799 {
2800     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2801     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2802     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2803     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2804     dri_bo *probs_bo = decode_state->probability_data->bo;
2805     int i, j,log2num;
2806     unsigned int quantization_value[4][6];
2807
2808     /* There is no safe way to error out if the segmentation buffer
2809        could not be allocated. So, instead of aborting, simply decode
2810        something even if the result may look totally inacurate */
2811     const unsigned int enable_segmentation =
2812         pic_param->pic_fields.bits.segmentation_enabled &&
2813         gen7_mfd_context->segmentation_buffer.valid;
2814         
2815     log2num = (int)log2(slice_param->num_of_partitions - 1);
2816
2817     BEGIN_BCS_BATCH(batch, 38);
2818     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2819     OUT_BCS_BATCH(batch,
2820                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2821                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2822     OUT_BCS_BATCH(batch,
2823                   log2num << 24 |
2824                   pic_param->pic_fields.bits.sharpness_level << 16 |
2825                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2826                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2827                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2828                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2829                   pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2830                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2831                   (enable_segmentation &&
2832                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2833                   (enable_segmentation &&
2834                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2835                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2836                   pic_param->pic_fields.bits.filter_type << 4 |
2837                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2838                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2839
2840     OUT_BCS_BATCH(batch,
2841                   pic_param->loop_filter_level[3] << 24 |
2842                   pic_param->loop_filter_level[2] << 16 |
2843                   pic_param->loop_filter_level[1] <<  8 |
2844                   pic_param->loop_filter_level[0] <<  0);
2845
2846     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2847     for (i = 0; i < 4; i++) {
2848                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2849                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2850                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2851                 /* 101581>>16 is equivalent to 155/100 */
2852                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2853                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2854                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2855
2856                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2857                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2858
2859                 OUT_BCS_BATCH(batch,
2860                       quantization_value[i][0] << 16 | /* Y1AC */
2861                       quantization_value[i][1] <<  0); /* Y1DC */
2862         OUT_BCS_BATCH(batch,
2863                       quantization_value[i][5] << 16 | /* UVAC */
2864                       quantization_value[i][4] <<  0); /* UVDC */
2865         OUT_BCS_BATCH(batch,
2866                       quantization_value[i][3] << 16 | /* Y2AC */
2867                       quantization_value[i][2] <<  0); /* Y2DC */
2868     }
2869
2870     /* CoeffProbability table for non-key frame, DW16-DW18 */
2871     if (probs_bo) {
2872         OUT_BCS_RELOC(batch, probs_bo,
2873                       0, I915_GEM_DOMAIN_INSTRUCTION,
2874                       0);
2875         OUT_BCS_BATCH(batch, 0);
2876         OUT_BCS_BATCH(batch, 0);
2877     } else {
2878         OUT_BCS_BATCH(batch, 0);
2879         OUT_BCS_BATCH(batch, 0);
2880         OUT_BCS_BATCH(batch, 0);
2881     }
2882
2883     OUT_BCS_BATCH(batch,
2884                   pic_param->mb_segment_tree_probs[2] << 16 |
2885                   pic_param->mb_segment_tree_probs[1] <<  8 |
2886                   pic_param->mb_segment_tree_probs[0] <<  0);
2887
2888     OUT_BCS_BATCH(batch,
2889                   pic_param->prob_skip_false << 24 |
2890                   pic_param->prob_intra      << 16 |
2891                   pic_param->prob_last       <<  8 |
2892                   pic_param->prob_gf         <<  0);
2893
2894     OUT_BCS_BATCH(batch,
2895                   pic_param->y_mode_probs[3] << 24 |
2896                   pic_param->y_mode_probs[2] << 16 |
2897                   pic_param->y_mode_probs[1] <<  8 |
2898                   pic_param->y_mode_probs[0] <<  0);
2899
2900     OUT_BCS_BATCH(batch,
2901                   pic_param->uv_mode_probs[2] << 16 |
2902                   pic_param->uv_mode_probs[1] <<  8 |
2903                   pic_param->uv_mode_probs[0] <<  0);
2904     
2905     /* MV update value, DW23-DW32 */
2906     for (i = 0; i < 2; i++) {
2907         for (j = 0; j < 20; j += 4) {
2908             OUT_BCS_BATCH(batch,
2909                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2910                           pic_param->mv_probs[i][j + 2] << 16 |
2911                           pic_param->mv_probs[i][j + 1] <<  8 |
2912                           pic_param->mv_probs[i][j + 0] <<  0);
2913         }
2914     }
2915
2916     OUT_BCS_BATCH(batch,
2917                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2918                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2919                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2920                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2921
2922     OUT_BCS_BATCH(batch,
2923                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2924                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2925                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2926                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2927
2928     /* segmentation id stream base address, DW35-DW37 */
2929     if (enable_segmentation) {
2930         OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2931                       0, I915_GEM_DOMAIN_INSTRUCTION,
2932                       0);
2933         OUT_BCS_BATCH(batch, 0);
2934         OUT_BCS_BATCH(batch, 0);
2935     }
2936     else {
2937         OUT_BCS_BATCH(batch, 0);
2938         OUT_BCS_BATCH(batch, 0);
2939         OUT_BCS_BATCH(batch, 0);
2940     }
2941     ADVANCE_BCS_BATCH(batch);
2942 }
2943
2944 static void
2945 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2946                         VAPictureParameterBufferVP8 *pic_param,
2947                         VASliceParameterBufferVP8 *slice_param,
2948                         dri_bo *slice_data_bo,
2949                         struct gen7_mfd_context *gen7_mfd_context)
2950 {
2951     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2952     int i, log2num;
2953     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2954     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2955     unsigned int partition_size_0 = slice_param->partition_size[0];
2956
2957     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2958     if (used_bits == 8) {
2959         used_bits = 0;
2960         offset += 1;
2961         partition_size_0 -= 1;
2962     }
2963
2964     assert(slice_param->num_of_partitions >= 2);
2965     assert(slice_param->num_of_partitions <= 9);
2966
2967     log2num = (int)log2(slice_param->num_of_partitions - 1);
2968
2969     BEGIN_BCS_BATCH(batch, 22);
2970     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2971     OUT_BCS_BATCH(batch,
2972                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2973                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2974                   log2num << 4 |
2975                   (slice_param->macroblock_offset & 0x7));
2976     OUT_BCS_BATCH(batch,
2977                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2978                   0);
2979
2980     OUT_BCS_BATCH(batch, partition_size_0);
2981     OUT_BCS_BATCH(batch, offset);
2982     //partion sizes in bytes are present after the above first partition when there are more than one token partition
2983     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2984     for (i = 1; i < 9; i++) {
2985         if (i < slice_param->num_of_partitions) {
2986             OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2987             OUT_BCS_BATCH(batch, offset);
2988         } else {
2989             OUT_BCS_BATCH(batch, 0);
2990             OUT_BCS_BATCH(batch, 0);
2991         }
2992
2993         offset += slice_param->partition_size[i];
2994     }
2995
2996     OUT_BCS_BATCH(batch,
2997                   1 << 31 | /* concealment method */
2998                   0);
2999
3000     ADVANCE_BCS_BATCH(batch);
3001 }
3002
3003 void
3004 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3005                             struct decode_state *decode_state,
3006                             struct gen7_mfd_context *gen7_mfd_context)
3007 {
3008     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3009     VAPictureParameterBufferVP8 *pic_param;
3010     VASliceParameterBufferVP8 *slice_param;
3011     dri_bo *slice_data_bo;
3012
3013     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3014     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3015
3016     /* one slice per frame */
3017     if (decode_state->num_slice_params != 1 ||
3018         (!decode_state->slice_params ||
3019          !decode_state->slice_params[0] ||
3020          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3021         (!decode_state->slice_datas ||
3022          !decode_state->slice_datas[0] ||
3023          !decode_state->slice_datas[0]->bo) ||
3024         !decode_state->probability_data) {
3025         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3026
3027         return;
3028     }
3029
3030     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3031     slice_data_bo = decode_state->slice_datas[0]->bo;
3032
3033     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3034     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3035     intel_batchbuffer_emit_mi_flush(batch);
3036     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3037     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3038     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3039     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3040     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3041     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3042     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3043     intel_batchbuffer_end_atomic(batch);
3044     intel_batchbuffer_flush(batch);
3045 }
3046
3047 static VAStatus
3048 gen8_mfd_decode_picture(VADriverContextP ctx, 
3049                         VAProfile profile, 
3050                         union codec_state *codec_state,
3051                         struct hw_context *hw_context)
3052
3053 {
3054     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3055     struct decode_state *decode_state = &codec_state->decode;
3056     VAStatus vaStatus;
3057
3058     assert(gen7_mfd_context);
3059
3060     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3061
3062     if (vaStatus != VA_STATUS_SUCCESS)
3063         goto out;
3064
3065     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3066
3067     switch (profile) {
3068     case VAProfileMPEG2Simple:
3069     case VAProfileMPEG2Main:
3070         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3071         break;
3072         
3073     case VAProfileH264ConstrainedBaseline:
3074     case VAProfileH264Main:
3075     case VAProfileH264High:
3076         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3077         break;
3078
3079     case VAProfileVC1Simple:
3080     case VAProfileVC1Main:
3081     case VAProfileVC1Advanced:
3082         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3083         break;
3084
3085     case VAProfileJPEGBaseline:
3086         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3087         break;
3088
3089     case VAProfileVP8Version0_3:
3090         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3091         break;
3092
3093     default:
3094         assert(0);
3095         break;
3096     }
3097
3098     vaStatus = VA_STATUS_SUCCESS;
3099
3100 out:
3101     return vaStatus;
3102 }
3103
3104 static void
3105 gen8_mfd_context_destroy(void *hw_context)
3106 {
3107     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3108
3109     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3110     gen7_mfd_context->post_deblocking_output.bo = NULL;
3111
3112     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3113     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3114
3115     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3116     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3117
3118     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3119     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3120
3121     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3122     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3123
3124     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3125     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3126
3127     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3128     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3129
3130     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3131     gen7_mfd_context->segmentation_buffer.bo = NULL;
3132
3133     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3134
3135     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3136     free(gen7_mfd_context);
3137 }
3138
3139 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3140                                     struct gen7_mfd_context *gen7_mfd_context)
3141 {
3142     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3143     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3144     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3145     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3146 }
3147
3148 struct hw_context *
3149 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3150 {
3151     struct intel_driver_data *intel = intel_driver_data(ctx);
3152     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3153     int i;
3154
3155     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3156     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3157     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3158
3159     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3160         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3161         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3162     }
3163
3164     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3165     gen7_mfd_context->segmentation_buffer.valid = 0;
3166
3167     switch (obj_config->profile) {
3168     case VAProfileMPEG2Simple:
3169     case VAProfileMPEG2Main:
3170         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3171         break;
3172
3173     case VAProfileH264ConstrainedBaseline:
3174     case VAProfileH264Main:
3175     case VAProfileH264High:
3176         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3177         break;
3178     default:
3179         break;
3180     }
3181     return (struct hw_context *)gen7_mfd_context;
3182 }