OSDN Git Service

decoder: h264: fix frame store logic for MVC.
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77         gen7_avc_surface->frame_store_id = -1;
78         assert((obj_surface->size & 0x3f) == 0);
79         obj_surface->private_data = gen7_avc_surface;
80     }
81
82     /* DMV buffers now relate to the whole frame, irrespective of
83        field coding modes */
84     if (gen7_avc_surface->dmv_top == NULL) {
85         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86                                                  "direct mv w/r buffer",
87                                                  width_in_mbs * height_in_mbs * 128,
88                                                  0x1000);
89         assert(gen7_avc_surface->dmv_top);
90     }
91 }
92
93 static void
94 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
95                           struct decode_state *decode_state,
96                           int standard_select,
97                           struct gen7_mfd_context *gen7_mfd_context)
98 {
99     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
100
101     assert(standard_select == MFX_FORMAT_MPEG2 ||
102            standard_select == MFX_FORMAT_AVC ||
103            standard_select == MFX_FORMAT_VC1 ||
104            standard_select == MFX_FORMAT_JPEG ||
105            standard_select == MFX_FORMAT_VP8);
106
107     BEGIN_BCS_BATCH(batch, 5);
108     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
109     OUT_BCS_BATCH(batch,
110                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
111                   (MFD_MODE_VLD << 15) | /* VLD mode */
112                   (0 << 10) | /* disable Stream-Out */
113                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
114                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
115                   (0 << 5)  | /* not in stitch mode */
116                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
117                   (standard_select << 0));
118     OUT_BCS_BATCH(batch,
119                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
120                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
121                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
122                   (0 << 1)  |
123                   (0 << 0));
124     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
125     OUT_BCS_BATCH(batch, 0); /* reserved */
126     ADVANCE_BCS_BATCH(batch);
127 }
128
129 static void
130 gen8_mfd_surface_state(VADriverContextP ctx,
131                        struct decode_state *decode_state,
132                        int standard_select,
133                        struct gen7_mfd_context *gen7_mfd_context)
134 {
135     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
136     struct object_surface *obj_surface = decode_state->render_object;
137     unsigned int y_cb_offset;
138     unsigned int y_cr_offset;
139     unsigned int surface_format;
140
141     assert(obj_surface);
142
143     y_cb_offset = obj_surface->y_cb_offset;
144     y_cr_offset = obj_surface->y_cr_offset;
145
146     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
147         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
148
149     BEGIN_BCS_BATCH(batch, 6);
150     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
151     OUT_BCS_BATCH(batch, 0);
152     OUT_BCS_BATCH(batch,
153                   ((obj_surface->orig_height - 1) << 18) |
154                   ((obj_surface->orig_width - 1) << 4));
155     OUT_BCS_BATCH(batch,
156                   (surface_format << 28) | /* 420 planar YUV surface */
157                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
158                   (0 << 22) | /* surface object control state, ignored */
159                   ((obj_surface->width - 1) << 3) | /* pitch */
160                   (0 << 2)  | /* must be 0 */
161                   (1 << 1)  | /* must be tiled */
162                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
163     OUT_BCS_BATCH(batch,
164                   (0 << 16) | /* X offset for U(Cb), must be 0 */
165                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
166     OUT_BCS_BATCH(batch,
167                   (0 << 16) | /* X offset for V(Cr), must be 0 */
168                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
169     ADVANCE_BCS_BATCH(batch);
170 }
171
172 static void
173 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
174                              struct decode_state *decode_state,
175                              int standard_select,
176                              struct gen7_mfd_context *gen7_mfd_context)
177 {
178     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
179     int i;
180
181     BEGIN_BCS_BATCH(batch, 61);
182     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
183         /* Pre-deblock 1-3 */
184     if (gen7_mfd_context->pre_deblocking_output.valid)
185         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
186                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
187                       0);
188     else
189         OUT_BCS_BATCH(batch, 0);
190
191         OUT_BCS_BATCH(batch, 0);
192         OUT_BCS_BATCH(batch, 0);
193         /* Post-debloing 4-6 */
194     if (gen7_mfd_context->post_deblocking_output.valid)
195         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
196                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197                       0);
198     else
199         OUT_BCS_BATCH(batch, 0);
200
201         OUT_BCS_BATCH(batch, 0);
202         OUT_BCS_BATCH(batch, 0);
203
204         /* uncompressed-video & stream out 7-12 */
205     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
207         OUT_BCS_BATCH(batch, 0);
208         OUT_BCS_BATCH(batch, 0);
209         OUT_BCS_BATCH(batch, 0);
210         OUT_BCS_BATCH(batch, 0);
211
212         /* intra row-store scratch 13-15 */
213     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
214         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
215                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
216                       0);
217     else
218         OUT_BCS_BATCH(batch, 0);
219
220         OUT_BCS_BATCH(batch, 0);
221         OUT_BCS_BATCH(batch, 0);
222         /* deblocking-filter-row-store 16-18 */
223     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
224         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
225                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
226                       0);
227     else
228         OUT_BCS_BATCH(batch, 0);
229         OUT_BCS_BATCH(batch, 0);
230         OUT_BCS_BATCH(batch, 0);
231
232     /* DW 19..50 */
233     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
234         struct object_surface *obj_surface;
235
236         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
237             gen7_mfd_context->reference_surface[i].obj_surface &&
238             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
239             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
240
241             OUT_BCS_RELOC(batch, obj_surface->bo,
242                           I915_GEM_DOMAIN_INSTRUCTION, 0,
243                           0);
244         } else {
245             OUT_BCS_BATCH(batch, 0);
246         }
247         
248         OUT_BCS_BATCH(batch, 0);
249     }
250     
251     /* reference property 51 */
252     OUT_BCS_BATCH(batch, 0);  
253         
254     /* Macroblock status & ILDB 52-57 */
255     OUT_BCS_BATCH(batch, 0);
256     OUT_BCS_BATCH(batch, 0);
257     OUT_BCS_BATCH(batch, 0);
258     OUT_BCS_BATCH(batch, 0);
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261
262     /* the second Macroblock status 58-60 */    
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265     OUT_BCS_BATCH(batch, 0);
266
267     ADVANCE_BCS_BATCH(batch);
268 }
269
270 static void
271 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
272                                  dri_bo *slice_data_bo,
273                                  int standard_select,
274                                  struct gen7_mfd_context *gen7_mfd_context)
275 {
276     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
277
278     BEGIN_BCS_BATCH(batch, 26);
279     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
280         /* MFX In BS 1-5 */
281     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
282     OUT_BCS_BATCH(batch, 0);
283     OUT_BCS_BATCH(batch, 0);
284         /* Upper bound 4-5 */   
285     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
286     OUT_BCS_BATCH(batch, 0);
287
288         /* MFX indirect MV 6-10 */
289     OUT_BCS_BATCH(batch, 0);
290     OUT_BCS_BATCH(batch, 0);
291     OUT_BCS_BATCH(batch, 0);
292     OUT_BCS_BATCH(batch, 0);
293     OUT_BCS_BATCH(batch, 0);
294         
295         /* MFX IT_COFF 11-15 */
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299     OUT_BCS_BATCH(batch, 0);
300     OUT_BCS_BATCH(batch, 0);
301
302         /* MFX IT_DBLK 16-20 */
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0);
307     OUT_BCS_BATCH(batch, 0);
308
309         /* MFX PAK_BSE object for encoder 21-25 */
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313     OUT_BCS_BATCH(batch, 0);
314     OUT_BCS_BATCH(batch, 0);
315
316     ADVANCE_BCS_BATCH(batch);
317 }
318
319 static void
320 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
321                                  struct decode_state *decode_state,
322                                  int standard_select,
323                                  struct gen7_mfd_context *gen7_mfd_context)
324 {
325     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
326
327     BEGIN_BCS_BATCH(batch, 10);
328     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
329
330     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
331         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
332                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
333                       0);
334         else
335                 OUT_BCS_BATCH(batch, 0);
336                 
337     OUT_BCS_BATCH(batch, 0);
338     OUT_BCS_BATCH(batch, 0);
339         /* MPR Row Store Scratch buffer 4-6 */
340     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
341         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
342                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
343                       0);
344     else
345         OUT_BCS_BATCH(batch, 0);
346
347     OUT_BCS_BATCH(batch, 0);
348     OUT_BCS_BATCH(batch, 0);
349
350         /* Bitplane 7-9 */ 
351     if (gen7_mfd_context->bitplane_read_buffer.valid)
352         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
353                       I915_GEM_DOMAIN_INSTRUCTION, 0,
354                       0);
355     else
356         OUT_BCS_BATCH(batch, 0);
357     OUT_BCS_BATCH(batch, 0);
358     OUT_BCS_BATCH(batch, 0);
359     ADVANCE_BCS_BATCH(batch);
360 }
361
362 static void
363 gen8_mfd_qm_state(VADriverContextP ctx,
364                   int qm_type,
365                   unsigned char *qm,
366                   int qm_length,
367                   struct gen7_mfd_context *gen7_mfd_context)
368 {
369     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
370     unsigned int qm_buffer[16];
371
372     assert(qm_length <= 16 * 4);
373     memcpy(qm_buffer, qm, qm_length);
374
375     BEGIN_BCS_BATCH(batch, 18);
376     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
377     OUT_BCS_BATCH(batch, qm_type << 0);
378     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
379     ADVANCE_BCS_BATCH(batch);
380 }
381
382 static void
383 gen8_mfd_avc_img_state(VADriverContextP ctx,
384                        struct decode_state *decode_state,
385                        struct gen7_mfd_context *gen7_mfd_context)
386 {
387     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
388     int img_struct;
389     int mbaff_frame_flag;
390     unsigned int width_in_mbs, height_in_mbs;
391     VAPictureParameterBufferH264 *pic_param;
392
393     assert(decode_state->pic_param && decode_state->pic_param->buffer);
394     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
395     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
396
397     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
398         img_struct = 1;
399     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
400         img_struct = 3;
401     else
402         img_struct = 0;
403
404     if ((img_struct & 0x1) == 0x1) {
405         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
406     } else {
407         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
408     }
409
410     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
411         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
412         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
413     } else {
414         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
415     }
416
417     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
418                         !pic_param->pic_fields.bits.field_pic_flag);
419
420     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
421     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
422
423     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
424     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
425            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
426     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
427
428     BEGIN_BCS_BATCH(batch, 17);
429     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
430     OUT_BCS_BATCH(batch, 
431                   (width_in_mbs * height_in_mbs - 1));
432     OUT_BCS_BATCH(batch, 
433                   ((height_in_mbs - 1) << 16) | 
434                   ((width_in_mbs - 1) << 0));
435     OUT_BCS_BATCH(batch, 
436                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
437                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
438                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
439                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
440                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
441                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
442                   (img_struct << 8));
443     OUT_BCS_BATCH(batch,
444                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
445                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
446                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
447                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
448                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
449                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
450                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
451                   (mbaff_frame_flag << 1) |
452                   (pic_param->pic_fields.bits.field_pic_flag << 0));
453     OUT_BCS_BATCH(batch, 0);
454     OUT_BCS_BATCH(batch, 0);
455     OUT_BCS_BATCH(batch, 0);
456     OUT_BCS_BATCH(batch, 0);
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     OUT_BCS_BATCH(batch, 0);
465     ADVANCE_BCS_BATCH(batch);
466 }
467
468 static void
469 gen8_mfd_avc_qm_state(VADriverContextP ctx,
470                       struct decode_state *decode_state,
471                       struct gen7_mfd_context *gen7_mfd_context)
472 {
473     VAIQMatrixBufferH264 *iq_matrix;
474     VAPictureParameterBufferH264 *pic_param;
475
476     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
477         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
478     else
479         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
480
481     assert(decode_state->pic_param && decode_state->pic_param->buffer);
482     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
483
484     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
485     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
486
487     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
488         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
489         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
490     }
491 }
492
493 static inline void
494 gen8_mfd_avc_picid_state(VADriverContextP ctx,
495     struct decode_state *decode_state,
496     struct gen7_mfd_context *gen7_mfd_context)
497 {
498     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
499         gen7_mfd_context->reference_surface);
500 }
501
502 static void
503 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
504                               struct decode_state *decode_state,
505                               VAPictureParameterBufferH264 *pic_param,
506                               VASliceParameterBufferH264 *slice_param,
507                               struct gen7_mfd_context *gen7_mfd_context)
508 {
509     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
510     struct object_surface *obj_surface;
511     GenAvcSurface *gen7_avc_surface;
512     VAPictureH264 *va_pic;
513     int i;
514
515     BEGIN_BCS_BATCH(batch, 71);
516     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
517
518     /* reference surfaces 0..15 */
519     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
520         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
521             gen7_mfd_context->reference_surface[i].obj_surface &&
522             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
523
524             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
525             gen7_avc_surface = obj_surface->private_data;
526
527             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
528                           I915_GEM_DOMAIN_INSTRUCTION, 0,
529                           0);
530             OUT_BCS_BATCH(batch, 0);
531         } else {
532             OUT_BCS_BATCH(batch, 0);
533             OUT_BCS_BATCH(batch, 0);
534         }
535     }
536     
537     OUT_BCS_BATCH(batch, 0);
538
539     /* the current decoding frame/field */
540     va_pic = &pic_param->CurrPic;
541     obj_surface = decode_state->render_object;
542     assert(obj_surface->bo && obj_surface->private_data);
543     gen7_avc_surface = obj_surface->private_data;
544
545     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
546                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
547                   0);
548
549     OUT_BCS_BATCH(batch, 0);
550     OUT_BCS_BATCH(batch, 0);
551
552     /* POC List */
553     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
554         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
555
556         if (obj_surface) {
557             const VAPictureH264 * const va_pic = avc_find_picture(
558                 obj_surface->base.id, pic_param->ReferenceFrames,
559                 ARRAY_ELEMS(pic_param->ReferenceFrames));
560
561             assert(va_pic != NULL);
562             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
563             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
564         } else {
565             OUT_BCS_BATCH(batch, 0);
566             OUT_BCS_BATCH(batch, 0);
567         }
568     }
569
570     va_pic = &pic_param->CurrPic;
571     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
572     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
573
574     ADVANCE_BCS_BATCH(batch);
575 }
576
577 static void
578 gen8_mfd_avc_slice_state(VADriverContextP ctx,
579                          VAPictureParameterBufferH264 *pic_param,
580                          VASliceParameterBufferH264 *slice_param,
581                          VASliceParameterBufferH264 *next_slice_param,
582                          struct gen7_mfd_context *gen7_mfd_context)
583 {
584     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
585     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
586     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
587     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
588     int num_ref_idx_l0, num_ref_idx_l1;
589     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
590                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
591     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
592     int slice_type;
593
594     if (slice_param->slice_type == SLICE_TYPE_I ||
595         slice_param->slice_type == SLICE_TYPE_SI) {
596         slice_type = SLICE_TYPE_I;
597     } else if (slice_param->slice_type == SLICE_TYPE_P ||
598                slice_param->slice_type == SLICE_TYPE_SP) {
599         slice_type = SLICE_TYPE_P;
600     } else { 
601         assert(slice_param->slice_type == SLICE_TYPE_B);
602         slice_type = SLICE_TYPE_B;
603     }
604
605     if (slice_type == SLICE_TYPE_I) {
606         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
607         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
608         num_ref_idx_l0 = 0;
609         num_ref_idx_l1 = 0;
610     } else if (slice_type == SLICE_TYPE_P) {
611         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
612         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
613         num_ref_idx_l1 = 0;
614     } else {
615         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
616         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
617     }
618
619     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
620     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
621     slice_ver_pos = first_mb_in_slice / width_in_mbs;
622
623     if (next_slice_param) {
624         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
625         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
626         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
627     } else {
628         next_slice_hor_pos = 0;
629         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
630     }
631
632     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
633     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
634     OUT_BCS_BATCH(batch, slice_type);
635     OUT_BCS_BATCH(batch, 
636                   (num_ref_idx_l1 << 24) |
637                   (num_ref_idx_l0 << 16) |
638                   (slice_param->chroma_log2_weight_denom << 8) |
639                   (slice_param->luma_log2_weight_denom << 0));
640     OUT_BCS_BATCH(batch, 
641                   (slice_param->direct_spatial_mv_pred_flag << 29) |
642                   (slice_param->disable_deblocking_filter_idc << 27) |
643                   (slice_param->cabac_init_idc << 24) |
644                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
645                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
646                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
647     OUT_BCS_BATCH(batch, 
648                   (slice_ver_pos << 24) |
649                   (slice_hor_pos << 16) | 
650                   (first_mb_in_slice << 0));
651     OUT_BCS_BATCH(batch,
652                   (next_slice_ver_pos << 16) |
653                   (next_slice_hor_pos << 0));
654     OUT_BCS_BATCH(batch, 
655                   (next_slice_param == NULL) << 19); /* last slice flag */
656     OUT_BCS_BATCH(batch, 0);
657     OUT_BCS_BATCH(batch, 0);
658     OUT_BCS_BATCH(batch, 0);
659     OUT_BCS_BATCH(batch, 0);
660     ADVANCE_BCS_BATCH(batch);
661 }
662
663 static inline void
664 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
665                            VAPictureParameterBufferH264 *pic_param,
666                            VASliceParameterBufferH264 *slice_param,
667                            struct gen7_mfd_context *gen7_mfd_context)
668 {
669     gen6_send_avc_ref_idx_state(
670         gen7_mfd_context->base.batch,
671         slice_param,
672         gen7_mfd_context->reference_surface
673     );
674 }
675
676 static void
677 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
678                                 VAPictureParameterBufferH264 *pic_param,
679                                 VASliceParameterBufferH264 *slice_param,
680                                 struct gen7_mfd_context *gen7_mfd_context)
681 {
682     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
683     int i, j, num_weight_offset_table = 0;
684     short weightoffsets[32 * 6];
685
686     if ((slice_param->slice_type == SLICE_TYPE_P ||
687          slice_param->slice_type == SLICE_TYPE_SP) &&
688         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
689         num_weight_offset_table = 1;
690     }
691     
692     if ((slice_param->slice_type == SLICE_TYPE_B) &&
693         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
694         num_weight_offset_table = 2;
695     }
696
697     for (i = 0; i < num_weight_offset_table; i++) {
698         BEGIN_BCS_BATCH(batch, 98);
699         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
700         OUT_BCS_BATCH(batch, i);
701
702         if (i == 0) {
703             for (j = 0; j < 32; j++) {
704                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
705                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
706                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
707                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
708                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
709                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
710             }
711         } else {
712             for (j = 0; j < 32; j++) {
713                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
714                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
715                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
716                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
717                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
718                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
719             }
720         }
721
722         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
723         ADVANCE_BCS_BATCH(batch);
724     }
725 }
726
727 static void
728 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
729                         VAPictureParameterBufferH264 *pic_param,
730                         VASliceParameterBufferH264 *slice_param,
731                         dri_bo *slice_data_bo,
732                         VASliceParameterBufferH264 *next_slice_param,
733                         struct gen7_mfd_context *gen7_mfd_context)
734 {
735     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
736     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
737                                                             slice_param,
738                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
739
740     /* the input bitsteam format on GEN7 differs from GEN6 */
741     BEGIN_BCS_BATCH(batch, 6);
742     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
743     OUT_BCS_BATCH(batch, 
744                   (slice_param->slice_data_size));
745     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
746     OUT_BCS_BATCH(batch,
747                   (0 << 31) |
748                   (0 << 14) |
749                   (0 << 12) |
750                   (0 << 10) |
751                   (0 << 8));
752     OUT_BCS_BATCH(batch,
753                   ((slice_data_bit_offset >> 3) << 16) |
754                   (1 << 7)  |
755                   (0 << 5)  |
756                   (0 << 4)  |
757                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
758                   (slice_data_bit_offset & 0x7));
759     OUT_BCS_BATCH(batch, 0);
760     ADVANCE_BCS_BATCH(batch);
761 }
762
763 static inline void
764 gen8_mfd_avc_context_init(
765     VADriverContextP         ctx,
766     struct gen7_mfd_context *gen7_mfd_context
767 )
768 {
769     /* Initialize flat scaling lists */
770     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
771 }
772
773 static void
774 gen8_mfd_avc_decode_init(VADriverContextP ctx,
775                          struct decode_state *decode_state,
776                          struct gen7_mfd_context *gen7_mfd_context)
777 {
778     VAPictureParameterBufferH264 *pic_param;
779     VASliceParameterBufferH264 *slice_param;
780     struct i965_driver_data *i965 = i965_driver_data(ctx);
781     struct object_surface *obj_surface;
782     dri_bo *bo;
783     int i, j, enable_avc_ildb = 0;
784     unsigned int width_in_mbs, height_in_mbs;
785
786     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
787         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
788         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
789
790         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
791             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
792             assert((slice_param->slice_type == SLICE_TYPE_I) ||
793                    (slice_param->slice_type == SLICE_TYPE_SI) ||
794                    (slice_param->slice_type == SLICE_TYPE_P) ||
795                    (slice_param->slice_type == SLICE_TYPE_SP) ||
796                    (slice_param->slice_type == SLICE_TYPE_B));
797
798             if (slice_param->disable_deblocking_filter_idc != 1) {
799                 enable_avc_ildb = 1;
800                 break;
801             }
802
803             slice_param++;
804         }
805     }
806
807     assert(decode_state->pic_param && decode_state->pic_param->buffer);
808     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
809     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
810         gen7_mfd_context->reference_surface);
811     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
812     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
813     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
814     assert(height_in_mbs > 0 && height_in_mbs <= 256);
815
816     /* Current decoded picture */
817     obj_surface = decode_state->render_object;
818     if (pic_param->pic_fields.bits.reference_pic_flag)
819         obj_surface->flags |= SURFACE_REFERENCED;
820     else
821         obj_surface->flags &= ~SURFACE_REFERENCED;
822
823     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
824     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
825
826     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
827     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
828     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
829     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
830
831     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
832     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
833     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
834     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
835
836     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
837     bo = dri_bo_alloc(i965->intel.bufmgr,
838                       "intra row store",
839                       width_in_mbs * 64,
840                       0x1000);
841     assert(bo);
842     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
843     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
844
845     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
846     bo = dri_bo_alloc(i965->intel.bufmgr,
847                       "deblocking filter row store",
848                       width_in_mbs * 64 * 4,
849                       0x1000);
850     assert(bo);
851     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
852     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
853
854     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
855     bo = dri_bo_alloc(i965->intel.bufmgr,
856                       "bsd mpc row store",
857                       width_in_mbs * 64 * 2,
858                       0x1000);
859     assert(bo);
860     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
861     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
862
863     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
864     bo = dri_bo_alloc(i965->intel.bufmgr,
865                       "mpr row store",
866                       width_in_mbs * 64 * 2,
867                       0x1000);
868     assert(bo);
869     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
870     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
871
872     gen7_mfd_context->bitplane_read_buffer.valid = 0;
873 }
874
875 static void
876 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
877                             struct decode_state *decode_state,
878                             struct gen7_mfd_context *gen7_mfd_context)
879 {
880     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
881     VAPictureParameterBufferH264 *pic_param;
882     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
883     dri_bo *slice_data_bo;
884     int i, j;
885
886     assert(decode_state->pic_param && decode_state->pic_param->buffer);
887     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
888     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
889
890     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
891     intel_batchbuffer_emit_mi_flush(batch);
892     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
893     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
894     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
895     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
896     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
897     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
898     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
899
900     for (j = 0; j < decode_state->num_slice_params; j++) {
901         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
902         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
903         slice_data_bo = decode_state->slice_datas[j]->bo;
904         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
905
906         if (j == decode_state->num_slice_params - 1)
907             next_slice_group_param = NULL;
908         else
909             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
910
911         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
912             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
913             assert((slice_param->slice_type == SLICE_TYPE_I) ||
914                    (slice_param->slice_type == SLICE_TYPE_SI) ||
915                    (slice_param->slice_type == SLICE_TYPE_P) ||
916                    (slice_param->slice_type == SLICE_TYPE_SP) ||
917                    (slice_param->slice_type == SLICE_TYPE_B));
918
919             if (i < decode_state->slice_params[j]->num_elements - 1)
920                 next_slice_param = slice_param + 1;
921             else
922                 next_slice_param = next_slice_group_param;
923
924             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
925             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
926             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
927             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
928             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
929             slice_param++;
930         }
931     }
932
933     intel_batchbuffer_end_atomic(batch);
934     intel_batchbuffer_flush(batch);
935 }
936
937 static void
938 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
939                            struct decode_state *decode_state,
940                            struct gen7_mfd_context *gen7_mfd_context)
941 {
942     VAPictureParameterBufferMPEG2 *pic_param;
943     struct i965_driver_data *i965 = i965_driver_data(ctx);
944     struct object_surface *obj_surface;
945     dri_bo *bo;
946     unsigned int width_in_mbs;
947
948     assert(decode_state->pic_param && decode_state->pic_param->buffer);
949     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
950     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
951
952     mpeg2_set_reference_surfaces(
953         ctx,
954         gen7_mfd_context->reference_surface,
955         decode_state,
956         pic_param
957     );
958
959     /* Current decoded picture */
960     obj_surface = decode_state->render_object;
961     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
962
963     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
964     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
965     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
966     gen7_mfd_context->pre_deblocking_output.valid = 1;
967
968     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
969     bo = dri_bo_alloc(i965->intel.bufmgr,
970                       "bsd mpc row store",
971                       width_in_mbs * 96,
972                       0x1000);
973     assert(bo);
974     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
975     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
976
977     gen7_mfd_context->post_deblocking_output.valid = 0;
978     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
979     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
980     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
981     gen7_mfd_context->bitplane_read_buffer.valid = 0;
982 }
983
984 static void
985 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
986                          struct decode_state *decode_state,
987                          struct gen7_mfd_context *gen7_mfd_context)
988 {
989     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
990     VAPictureParameterBufferMPEG2 *pic_param;
991     unsigned int slice_concealment_disable_bit = 0;
992
993     assert(decode_state->pic_param && decode_state->pic_param->buffer);
994     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
995
996     slice_concealment_disable_bit = 1;
997
998     BEGIN_BCS_BATCH(batch, 13);
999     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1000     OUT_BCS_BATCH(batch,
1001                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1002                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1003                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1004                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1005                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1006                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1007                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1008                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1009                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1010                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1011                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1012                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1013     OUT_BCS_BATCH(batch,
1014                   pic_param->picture_coding_type << 9);
1015     OUT_BCS_BATCH(batch,
1016                   (slice_concealment_disable_bit << 31) |
1017                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1018                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1019     OUT_BCS_BATCH(batch, 0);
1020     OUT_BCS_BATCH(batch, 0);
1021     OUT_BCS_BATCH(batch, 0);
1022     OUT_BCS_BATCH(batch, 0);
1023     OUT_BCS_BATCH(batch, 0);
1024     OUT_BCS_BATCH(batch, 0);
1025     OUT_BCS_BATCH(batch, 0);
1026     OUT_BCS_BATCH(batch, 0);
1027     OUT_BCS_BATCH(batch, 0);
1028     ADVANCE_BCS_BATCH(batch);
1029 }
1030
1031 static void
1032 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1033                         struct decode_state *decode_state,
1034                         struct gen7_mfd_context *gen7_mfd_context)
1035 {
1036     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1037     int i, j;
1038
1039     /* Update internal QM state */
1040     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1041         VAIQMatrixBufferMPEG2 * const iq_matrix =
1042             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1043
1044         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1045             iq_matrix->load_intra_quantiser_matrix) {
1046             gen_iq_matrix->load_intra_quantiser_matrix =
1047                 iq_matrix->load_intra_quantiser_matrix;
1048             if (iq_matrix->load_intra_quantiser_matrix) {
1049                 for (j = 0; j < 64; j++)
1050                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1051                         iq_matrix->intra_quantiser_matrix[j];
1052             }
1053         }
1054
1055         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1056             iq_matrix->load_non_intra_quantiser_matrix) {
1057             gen_iq_matrix->load_non_intra_quantiser_matrix =
1058                 iq_matrix->load_non_intra_quantiser_matrix;
1059             if (iq_matrix->load_non_intra_quantiser_matrix) {
1060                 for (j = 0; j < 64; j++)
1061                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1062                         iq_matrix->non_intra_quantiser_matrix[j];
1063             }
1064         }
1065     }
1066
1067     /* Commit QM state to HW */
1068     for (i = 0; i < 2; i++) {
1069         unsigned char *qm = NULL;
1070         int qm_type;
1071
1072         if (i == 0) {
1073             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1074                 qm = gen_iq_matrix->intra_quantiser_matrix;
1075                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1076             }
1077         } else {
1078             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1079                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1080                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1081             }
1082         }
1083
1084         if (!qm)
1085             continue;
1086
1087         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1088     }
1089 }
1090
1091 static void
1092 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1093                           VAPictureParameterBufferMPEG2 *pic_param,
1094                           VASliceParameterBufferMPEG2 *slice_param,
1095                           VASliceParameterBufferMPEG2 *next_slice_param,
1096                           struct gen7_mfd_context *gen7_mfd_context)
1097 {
1098     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1099     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1100     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1101
1102     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1103         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1104         is_field_pic = 1;
1105     is_field_pic_wa = is_field_pic &&
1106         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1107
1108     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1109     hpos0 = slice_param->slice_horizontal_position;
1110
1111     if (next_slice_param == NULL) {
1112         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1113         hpos1 = 0;
1114     } else {
1115         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1116         hpos1 = next_slice_param->slice_horizontal_position;
1117     }
1118
1119     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1120
1121     BEGIN_BCS_BATCH(batch, 5);
1122     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1123     OUT_BCS_BATCH(batch, 
1124                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1125     OUT_BCS_BATCH(batch, 
1126                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1127     OUT_BCS_BATCH(batch,
1128                   hpos0 << 24 |
1129                   vpos0 << 16 |
1130                   mb_count << 8 |
1131                   (next_slice_param == NULL) << 5 |
1132                   (next_slice_param == NULL) << 3 |
1133                   (slice_param->macroblock_offset & 0x7));
1134     OUT_BCS_BATCH(batch,
1135                   (slice_param->quantiser_scale_code << 24) |
1136                   (vpos1 << 8 | hpos1));
1137     ADVANCE_BCS_BATCH(batch);
1138 }
1139
1140 static void
1141 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1142                               struct decode_state *decode_state,
1143                               struct gen7_mfd_context *gen7_mfd_context)
1144 {
1145     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1146     VAPictureParameterBufferMPEG2 *pic_param;
1147     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1148     dri_bo *slice_data_bo;
1149     int i, j;
1150
1151     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1152     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1153
1154     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1155     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1156     intel_batchbuffer_emit_mi_flush(batch);
1157     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1158     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1159     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1160     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1161     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1162     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1163
1164     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1165         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1166             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1167
1168     for (j = 0; j < decode_state->num_slice_params; j++) {
1169         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1170         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1171         slice_data_bo = decode_state->slice_datas[j]->bo;
1172         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1173
1174         if (j == decode_state->num_slice_params - 1)
1175             next_slice_group_param = NULL;
1176         else
1177             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1178
1179         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1180             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1181
1182             if (i < decode_state->slice_params[j]->num_elements - 1)
1183                 next_slice_param = slice_param + 1;
1184             else
1185                 next_slice_param = next_slice_group_param;
1186
1187             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1188             slice_param++;
1189         }
1190     }
1191
1192     intel_batchbuffer_end_atomic(batch);
1193     intel_batchbuffer_flush(batch);
1194 }
1195
1196 static const int va_to_gen7_vc1_pic_type[5] = {
1197     GEN7_VC1_I_PICTURE,
1198     GEN7_VC1_P_PICTURE,
1199     GEN7_VC1_B_PICTURE,
1200     GEN7_VC1_BI_PICTURE,
1201     GEN7_VC1_P_PICTURE,
1202 };
1203
1204 static const int va_to_gen7_vc1_mv[4] = {
1205     1, /* 1-MV */
1206     2, /* 1-MV half-pel */
1207     3, /* 1-MV half-pef bilinear */
1208     0, /* Mixed MV */
1209 };
1210
1211 static const int b_picture_scale_factor[21] = {
1212     128, 85,  170, 64,  192,
1213     51,  102, 153, 204, 43,
1214     215, 37,  74,  111, 148,
1215     185, 222, 32,  96,  160, 
1216     224,
1217 };
1218
1219 static const int va_to_gen7_vc1_condover[3] = {
1220     0,
1221     2,
1222     3
1223 };
1224
1225 static const int va_to_gen7_vc1_profile[4] = {
1226     GEN7_VC1_SIMPLE_PROFILE,
1227     GEN7_VC1_MAIN_PROFILE,
1228     GEN7_VC1_RESERVED_PROFILE,
1229     GEN7_VC1_ADVANCED_PROFILE
1230 };
1231
1232 static void 
1233 gen8_mfd_free_vc1_surface(void **data)
1234 {
1235     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1236
1237     if (!gen7_vc1_surface)
1238         return;
1239
1240     dri_bo_unreference(gen7_vc1_surface->dmv);
1241     free(gen7_vc1_surface);
1242     *data = NULL;
1243 }
1244
1245 static void
1246 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1247                           VAPictureParameterBufferVC1 *pic_param,
1248                           struct object_surface *obj_surface)
1249 {
1250     struct i965_driver_data *i965 = i965_driver_data(ctx);
1251     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1252     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1253     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1254
1255     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1256
1257     if (!gen7_vc1_surface) {
1258         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1259         assert((obj_surface->size & 0x3f) == 0);
1260         obj_surface->private_data = gen7_vc1_surface;
1261     }
1262
1263     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1264
1265     if (gen7_vc1_surface->dmv == NULL) {
1266         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1267                                              "direct mv w/r buffer",
1268                                              width_in_mbs * height_in_mbs * 64,
1269                                              0x1000);
1270     }
1271 }
1272
1273 static void
1274 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1275                          struct decode_state *decode_state,
1276                          struct gen7_mfd_context *gen7_mfd_context)
1277 {
1278     VAPictureParameterBufferVC1 *pic_param;
1279     struct i965_driver_data *i965 = i965_driver_data(ctx);
1280     struct object_surface *obj_surface;
1281     dri_bo *bo;
1282     int width_in_mbs;
1283     int picture_type;
1284
1285     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1286     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1287     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1288     picture_type = pic_param->picture_fields.bits.picture_type;
1289  
1290     intel_update_vc1_frame_store_index(ctx,
1291                                        decode_state,
1292                                        pic_param,
1293                                        gen7_mfd_context->reference_surface);
1294
1295     /* Current decoded picture */
1296     obj_surface = decode_state->render_object;
1297     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1298     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1299
1300     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1301     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1302     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1303     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1304
1305     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1306     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1307     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1308     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1309
1310     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1311     bo = dri_bo_alloc(i965->intel.bufmgr,
1312                       "intra row store",
1313                       width_in_mbs * 64,
1314                       0x1000);
1315     assert(bo);
1316     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1317     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1318
1319     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1320     bo = dri_bo_alloc(i965->intel.bufmgr,
1321                       "deblocking filter row store",
1322                       width_in_mbs * 7 * 64,
1323                       0x1000);
1324     assert(bo);
1325     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1326     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1327
1328     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1329     bo = dri_bo_alloc(i965->intel.bufmgr,
1330                       "bsd mpc row store",
1331                       width_in_mbs * 96,
1332                       0x1000);
1333     assert(bo);
1334     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1335     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1336
1337     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1338
1339     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1340     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1341     
1342     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1343         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1344         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1345         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1346         int src_w, src_h;
1347         uint8_t *src = NULL, *dst = NULL;
1348
1349         assert(decode_state->bit_plane->buffer);
1350         src = decode_state->bit_plane->buffer;
1351
1352         bo = dri_bo_alloc(i965->intel.bufmgr,
1353                           "VC-1 Bitplane",
1354                           bitplane_width * height_in_mbs,
1355                           0x1000);
1356         assert(bo);
1357         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1358
1359         dri_bo_map(bo, True);
1360         assert(bo->virtual);
1361         dst = bo->virtual;
1362
1363         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1364             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1365                 int src_index, dst_index;
1366                 int src_shift;
1367                 uint8_t src_value;
1368
1369                 src_index = (src_h * width_in_mbs + src_w) / 2;
1370                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1371                 src_value = ((src[src_index] >> src_shift) & 0xf);
1372
1373                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1374                     src_value |= 0x2;
1375                 }
1376
1377                 dst_index = src_w / 2;
1378                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1379             }
1380
1381             if (src_w & 1)
1382                 dst[src_w / 2] >>= 4;
1383
1384             dst += bitplane_width;
1385         }
1386
1387         dri_bo_unmap(bo);
1388     } else
1389         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1390 }
1391
1392 static void
1393 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1394                        struct decode_state *decode_state,
1395                        struct gen7_mfd_context *gen7_mfd_context)
1396 {
1397     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1398     VAPictureParameterBufferVC1 *pic_param;
1399     struct object_surface *obj_surface;
1400     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1401     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1402     int unified_mv_mode;
1403     int ref_field_pic_polarity = 0;
1404     int scale_factor = 0;
1405     int trans_ac_y = 0;
1406     int dmv_surface_valid = 0;
1407     int brfd = 0;
1408     int fcm = 0;
1409     int picture_type;
1410     int profile;
1411     int overlap;
1412     int interpolation_mode = 0;
1413
1414     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1415     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1416
1417     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1418     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1419     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1420     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1421     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1422     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1423     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1424     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1425
1426     if (dquant == 0) {
1427         alt_pquant_config = 0;
1428         alt_pquant_edge_mask = 0;
1429     } else if (dquant == 2) {
1430         alt_pquant_config = 1;
1431         alt_pquant_edge_mask = 0xf;
1432     } else {
1433         assert(dquant == 1);
1434         if (dquantfrm == 0) {
1435             alt_pquant_config = 0;
1436             alt_pquant_edge_mask = 0;
1437             alt_pq = 0;
1438         } else {
1439             assert(dquantfrm == 1);
1440             alt_pquant_config = 1;
1441
1442             switch (dqprofile) {
1443             case 3:
1444                 if (dqbilevel == 0) {
1445                     alt_pquant_config = 2;
1446                     alt_pquant_edge_mask = 0;
1447                 } else {
1448                     assert(dqbilevel == 1);
1449                     alt_pquant_config = 3;
1450                     alt_pquant_edge_mask = 0;
1451                 }
1452                 break;
1453                 
1454             case 0:
1455                 alt_pquant_edge_mask = 0xf;
1456                 break;
1457
1458             case 1:
1459                 if (dqdbedge == 3)
1460                     alt_pquant_edge_mask = 0x9;
1461                 else
1462                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1463
1464                 break;
1465
1466             case 2:
1467                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1468                 break;
1469
1470             default:
1471                 assert(0);
1472             }
1473         }
1474     }
1475
1476     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1477         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1478         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1479     } else {
1480         assert(pic_param->mv_fields.bits.mv_mode < 4);
1481         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1482     }
1483
1484     if (pic_param->sequence_fields.bits.interlace == 1 &&
1485         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1486         /* FIXME: calculate reference field picture polarity */
1487         assert(0);
1488         ref_field_pic_polarity = 0;
1489     }
1490
1491     if (pic_param->b_picture_fraction < 21)
1492         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1493
1494     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1495     
1496     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1497         picture_type == GEN7_VC1_I_PICTURE)
1498         picture_type = GEN7_VC1_BI_PICTURE;
1499
1500     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1501         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1502     else {
1503         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1504
1505         /*
1506          * 8.3.6.2.1 Transform Type Selection
1507          * If variable-sized transform coding is not enabled,
1508          * then the 8x8 transform shall be used for all blocks.
1509          * it is also MFX_VC1_PIC_STATE requirement.
1510          */
1511         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1512             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1513             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1514         }
1515     }
1516
1517     if (picture_type == GEN7_VC1_B_PICTURE) {
1518         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1519
1520         obj_surface = decode_state->reference_objects[1];
1521
1522         if (obj_surface)
1523             gen7_vc1_surface = obj_surface->private_data;
1524
1525         if (!gen7_vc1_surface || 
1526             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1527              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1528             dmv_surface_valid = 0;
1529         else
1530             dmv_surface_valid = 1;
1531     }
1532
1533     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1534
1535     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1536         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1537     else {
1538         if (pic_param->picture_fields.bits.top_field_first)
1539             fcm = 2;
1540         else
1541             fcm = 3;
1542     }
1543
1544     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1545         brfd = pic_param->reference_fields.bits.reference_distance;
1546         brfd = (scale_factor * brfd) >> 8;
1547         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1548
1549         if (brfd < 0)
1550             brfd = 0;
1551     }
1552
1553     overlap = 0;
1554     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1555         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1556             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1557             overlap = 1; 
1558         }
1559     }else {
1560         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1561              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1562               overlap = 1; 
1563         }
1564         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1565             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1566              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1567                 overlap = 1; 
1568              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1569                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1570                  overlap = 1;
1571              }
1572         }
1573     } 
1574
1575     assert(pic_param->conditional_overlap_flag < 3);
1576     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1577
1578     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1579         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1580          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1581         interpolation_mode = 9; /* Half-pel bilinear */
1582     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1583              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1584               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1585         interpolation_mode = 1; /* Half-pel bicubic */
1586     else
1587         interpolation_mode = 0; /* Quarter-pel bicubic */
1588
1589     BEGIN_BCS_BATCH(batch, 6);
1590     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1591     OUT_BCS_BATCH(batch,
1592                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1593                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1594     OUT_BCS_BATCH(batch,
1595                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1596                   dmv_surface_valid << 15 |
1597                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1598                   pic_param->rounding_control << 13 |
1599                   pic_param->sequence_fields.bits.syncmarker << 12 |
1600                   interpolation_mode << 8 |
1601                   0 << 7 | /* FIXME: scale up or down ??? */
1602                   pic_param->range_reduction_frame << 6 |
1603                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1604                   overlap << 4 |
1605                   !pic_param->picture_fields.bits.is_first_field << 3 |
1606                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1607     OUT_BCS_BATCH(batch,
1608                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1609                   picture_type << 26 |
1610                   fcm << 24 |
1611                   alt_pq << 16 |
1612                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1613                   scale_factor << 0);
1614     OUT_BCS_BATCH(batch,
1615                   unified_mv_mode << 28 |
1616                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1617                   pic_param->fast_uvmc_flag << 26 |
1618                   ref_field_pic_polarity << 25 |
1619                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1620                   pic_param->reference_fields.bits.reference_distance << 20 |
1621                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1622                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1623                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1624                   alt_pquant_edge_mask << 4 |
1625                   alt_pquant_config << 2 |
1626                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1627                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1628     OUT_BCS_BATCH(batch,
1629                   !!pic_param->bitplane_present.value << 31 |
1630                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1631                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1632                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1633                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1634                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1635                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1636                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1637                   pic_param->mv_fields.bits.mv_table << 20 |
1638                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1639                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1640                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1641                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1642                   pic_param->mb_mode_table << 8 |
1643                   trans_ac_y << 6 |
1644                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1645                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1646                   pic_param->cbp_table << 0);
1647     ADVANCE_BCS_BATCH(batch);
1648 }
1649
1650 static void
1651 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1652                              struct decode_state *decode_state,
1653                              struct gen7_mfd_context *gen7_mfd_context)
1654 {
1655     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1656     VAPictureParameterBufferVC1 *pic_param;
1657     int intensitycomp_single;
1658
1659     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1660     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1661
1662     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1663     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1664     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1665
1666     BEGIN_BCS_BATCH(batch, 6);
1667     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1668     OUT_BCS_BATCH(batch,
1669                   0 << 14 | /* FIXME: double ??? */
1670                   0 << 12 |
1671                   intensitycomp_single << 10 |
1672                   intensitycomp_single << 8 |
1673                   0 << 4 | /* FIXME: interlace mode */
1674                   0);
1675     OUT_BCS_BATCH(batch,
1676                   pic_param->luma_shift << 16 |
1677                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1678     OUT_BCS_BATCH(batch, 0);
1679     OUT_BCS_BATCH(batch, 0);
1680     OUT_BCS_BATCH(batch, 0);
1681     ADVANCE_BCS_BATCH(batch);
1682 }
1683
1684 static void
1685 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1686                               struct decode_state *decode_state,
1687                               struct gen7_mfd_context *gen7_mfd_context)
1688 {
1689     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1690     struct object_surface *obj_surface;
1691     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1692
1693     obj_surface = decode_state->render_object;
1694
1695     if (obj_surface && obj_surface->private_data) {
1696         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1697     }
1698
1699     obj_surface = decode_state->reference_objects[1];
1700
1701     if (obj_surface && obj_surface->private_data) {
1702         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1703     }
1704
1705     BEGIN_BCS_BATCH(batch, 7);
1706     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1707
1708     if (dmv_write_buffer)
1709         OUT_BCS_RELOC(batch, dmv_write_buffer,
1710                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1711                       0);
1712     else
1713         OUT_BCS_BATCH(batch, 0);
1714
1715     OUT_BCS_BATCH(batch, 0);
1716     OUT_BCS_BATCH(batch, 0);
1717
1718     if (dmv_read_buffer)
1719         OUT_BCS_RELOC(batch, dmv_read_buffer,
1720                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1721                       0);
1722     else
1723         OUT_BCS_BATCH(batch, 0);
1724     
1725     OUT_BCS_BATCH(batch, 0);
1726     OUT_BCS_BATCH(batch, 0);
1727                   
1728     ADVANCE_BCS_BATCH(batch);
1729 }
1730
1731 static int
1732 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1733 {
1734     int out_slice_data_bit_offset;
1735     int slice_header_size = in_slice_data_bit_offset / 8;
1736     int i, j;
1737
1738     if (profile != 3)
1739         out_slice_data_bit_offset = in_slice_data_bit_offset;
1740     else {
1741         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1742             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1743                 i++, j += 2;
1744             }
1745         }
1746
1747         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1748     }
1749
1750     return out_slice_data_bit_offset;
1751 }
1752
1753 static void
1754 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1755                         VAPictureParameterBufferVC1 *pic_param,
1756                         VASliceParameterBufferVC1 *slice_param,
1757                         VASliceParameterBufferVC1 *next_slice_param,
1758                         dri_bo *slice_data_bo,
1759                         struct gen7_mfd_context *gen7_mfd_context)
1760 {
1761     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1762     int next_slice_start_vert_pos;
1763     int macroblock_offset;
1764     uint8_t *slice_data = NULL;
1765
1766     dri_bo_map(slice_data_bo, 0);
1767     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1768     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1769                                                                slice_param->macroblock_offset,
1770                                                                pic_param->sequence_fields.bits.profile);
1771     dri_bo_unmap(slice_data_bo);
1772
1773     if (next_slice_param)
1774         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1775     else
1776         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1777
1778     BEGIN_BCS_BATCH(batch, 5);
1779     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1780     OUT_BCS_BATCH(batch, 
1781                   slice_param->slice_data_size - (macroblock_offset >> 3));
1782     OUT_BCS_BATCH(batch, 
1783                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1784     OUT_BCS_BATCH(batch,
1785                   slice_param->slice_vertical_position << 16 |
1786                   next_slice_start_vert_pos << 0);
1787     OUT_BCS_BATCH(batch,
1788                   (macroblock_offset & 0x7));
1789     ADVANCE_BCS_BATCH(batch);
1790 }
1791
1792 static void
1793 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1794                             struct decode_state *decode_state,
1795                             struct gen7_mfd_context *gen7_mfd_context)
1796 {
1797     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1798     VAPictureParameterBufferVC1 *pic_param;
1799     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1800     dri_bo *slice_data_bo;
1801     int i, j;
1802
1803     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1804     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1805
1806     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1807     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1808     intel_batchbuffer_emit_mi_flush(batch);
1809     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1810     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1811     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1812     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1813     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1814     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1815     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1816
1817     for (j = 0; j < decode_state->num_slice_params; j++) {
1818         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1819         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1820         slice_data_bo = decode_state->slice_datas[j]->bo;
1821         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1822
1823         if (j == decode_state->num_slice_params - 1)
1824             next_slice_group_param = NULL;
1825         else
1826             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1827
1828         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1829             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1830
1831             if (i < decode_state->slice_params[j]->num_elements - 1)
1832                 next_slice_param = slice_param + 1;
1833             else
1834                 next_slice_param = next_slice_group_param;
1835
1836             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1837             slice_param++;
1838         }
1839     }
1840
1841     intel_batchbuffer_end_atomic(batch);
1842     intel_batchbuffer_flush(batch);
1843 }
1844
1845 static void
1846 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1847                           struct decode_state *decode_state,
1848                           struct gen7_mfd_context *gen7_mfd_context)
1849 {
1850     struct object_surface *obj_surface;
1851     VAPictureParameterBufferJPEGBaseline *pic_param;
1852     int subsampling = SUBSAMPLE_YUV420;
1853     int fourcc = VA_FOURCC_IMC3;
1854
1855     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1856
1857     if (pic_param->num_components == 1)
1858         subsampling = SUBSAMPLE_YUV400;
1859     else if (pic_param->num_components == 3) {
1860         int h1 = pic_param->components[0].h_sampling_factor;
1861         int h2 = pic_param->components[1].h_sampling_factor;
1862         int h3 = pic_param->components[2].h_sampling_factor;
1863         int v1 = pic_param->components[0].v_sampling_factor;
1864         int v2 = pic_param->components[1].v_sampling_factor;
1865         int v3 = pic_param->components[2].v_sampling_factor;
1866
1867         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1868             v1 == 2 && v2 == 1 && v3 == 1) {
1869             subsampling = SUBSAMPLE_YUV420;
1870             fourcc = VA_FOURCC_IMC3;
1871         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1872                    v1 == 1 && v2 == 1 && v3 == 1) {
1873             subsampling = SUBSAMPLE_YUV422H;
1874             fourcc = VA_FOURCC_422H;
1875         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1876                    v1 == 1 && v2 == 1 && v3 == 1) {
1877             subsampling = SUBSAMPLE_YUV444;
1878             fourcc = VA_FOURCC_444P;
1879         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1880                    v1 == 1 && v2 == 1 && v3 == 1) {
1881             subsampling = SUBSAMPLE_YUV411;
1882             fourcc = VA_FOURCC_411P;
1883         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1884                    v1 == 2 && v2 == 1 && v3 == 1) {
1885             subsampling = SUBSAMPLE_YUV422V;
1886             fourcc = VA_FOURCC_422V;
1887         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1888                    v1 == 2 && v2 == 2 && v3 == 2) {
1889             subsampling = SUBSAMPLE_YUV422H;
1890             fourcc = VA_FOURCC_422H;
1891         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1892                    v1 == 2 && v2 == 1 && v3 == 1) {
1893             subsampling = SUBSAMPLE_YUV422V;
1894             fourcc = VA_FOURCC_422V;
1895         } else
1896             assert(0);
1897     }
1898     else {
1899         assert(0);
1900     }
1901
1902     /* Current decoded picture */
1903     obj_surface = decode_state->render_object;
1904     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1905
1906     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1907     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1908     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1909     gen7_mfd_context->pre_deblocking_output.valid = 1;
1910
1911     gen7_mfd_context->post_deblocking_output.bo = NULL;
1912     gen7_mfd_context->post_deblocking_output.valid = 0;
1913
1914     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1915     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1916
1917     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1918     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1919
1920     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1921     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1922
1923     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1924     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1925
1926     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1927     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1928 }
1929
1930 static const int va_to_gen7_jpeg_rotation[4] = {
1931     GEN7_JPEG_ROTATION_0,
1932     GEN7_JPEG_ROTATION_90,
1933     GEN7_JPEG_ROTATION_180,
1934     GEN7_JPEG_ROTATION_270
1935 };
1936
1937 static void
1938 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1939                         struct decode_state *decode_state,
1940                         struct gen7_mfd_context *gen7_mfd_context)
1941 {
1942     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1943     VAPictureParameterBufferJPEGBaseline *pic_param;
1944     int chroma_type = GEN7_YUV420;
1945     int frame_width_in_blks;
1946     int frame_height_in_blks;
1947
1948     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1949     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1950
1951     if (pic_param->num_components == 1)
1952         chroma_type = GEN7_YUV400;
1953     else if (pic_param->num_components == 3) {
1954         int h1 = pic_param->components[0].h_sampling_factor;
1955         int h2 = pic_param->components[1].h_sampling_factor;
1956         int h3 = pic_param->components[2].h_sampling_factor;
1957         int v1 = pic_param->components[0].v_sampling_factor;
1958         int v2 = pic_param->components[1].v_sampling_factor;
1959         int v3 = pic_param->components[2].v_sampling_factor;
1960
1961         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1962             v1 == 2 && v2 == 1 && v3 == 1)
1963             chroma_type = GEN7_YUV420;
1964         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1965                  v1 == 1 && v2 == 1 && v3 == 1)
1966             chroma_type = GEN7_YUV422H_2Y;
1967         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1968                  v1 == 1 && v2 == 1 && v3 == 1)
1969             chroma_type = GEN7_YUV444;
1970         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1971                  v1 == 1 && v2 == 1 && v3 == 1)
1972             chroma_type = GEN7_YUV411;
1973         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1974                  v1 == 2 && v2 == 1 && v3 == 1)
1975             chroma_type = GEN7_YUV422V_2Y;
1976         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1977                  v1 == 2 && v2 == 2 && v3 == 2)
1978             chroma_type = GEN7_YUV422H_4Y;
1979         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1980                  v1 == 2 && v2 == 1 && v3 == 1)
1981             chroma_type = GEN7_YUV422V_4Y;
1982         else
1983             assert(0);
1984     }
1985
1986     if (chroma_type == GEN7_YUV400 ||
1987         chroma_type == GEN7_YUV444 ||
1988         chroma_type == GEN7_YUV422V_2Y) {
1989         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
1990         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
1991     } else if (chroma_type == GEN7_YUV411) {
1992         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
1993         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
1994     } else {
1995         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
1996         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
1997     }
1998
1999     BEGIN_BCS_BATCH(batch, 3);
2000     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2001     OUT_BCS_BATCH(batch,
2002                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2003                   (chroma_type << 0));
2004     OUT_BCS_BATCH(batch,
2005                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2006                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2007     ADVANCE_BCS_BATCH(batch);
2008 }
2009
2010 static const int va_to_gen7_jpeg_hufftable[2] = {
2011     MFX_HUFFTABLE_ID_Y,
2012     MFX_HUFFTABLE_ID_UV
2013 };
2014
2015 static void
2016 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2017                                struct decode_state *decode_state,
2018                                struct gen7_mfd_context *gen7_mfd_context,
2019                                int num_tables)
2020 {
2021     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2022     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2023     int index;
2024
2025     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2026         return;
2027
2028     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2029
2030     for (index = 0; index < num_tables; index++) {
2031         int id = va_to_gen7_jpeg_hufftable[index];
2032         if (!huffman_table->load_huffman_table[index])
2033             continue;
2034         BEGIN_BCS_BATCH(batch, 53);
2035         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2036         OUT_BCS_BATCH(batch, id);
2037         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2038         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2039         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2040         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2041         ADVANCE_BCS_BATCH(batch);
2042     }
2043 }
2044
2045 static const int va_to_gen7_jpeg_qm[5] = {
2046     -1,
2047     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2048     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2049     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2050     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2051 };
2052
2053 static void
2054 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2055                        struct decode_state *decode_state,
2056                        struct gen7_mfd_context *gen7_mfd_context)
2057 {
2058     VAPictureParameterBufferJPEGBaseline *pic_param;
2059     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2060     int index;
2061
2062     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2063         return;
2064
2065     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2066     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2067
2068     assert(pic_param->num_components <= 3);
2069
2070     for (index = 0; index < pic_param->num_components; index++) {
2071         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2072         int qm_type;
2073         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2074         unsigned char raster_qm[64];
2075         int j;
2076
2077         if (id > 4 || id < 1)
2078             continue;
2079
2080         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2081             continue;
2082
2083         qm_type = va_to_gen7_jpeg_qm[id];
2084
2085         for (j = 0; j < 64; j++)
2086             raster_qm[zigzag_direct[j]] = qm[j];
2087
2088         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2089     }
2090 }
2091
2092 static void
2093 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2094                          VAPictureParameterBufferJPEGBaseline *pic_param,
2095                          VASliceParameterBufferJPEGBaseline *slice_param,
2096                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2097                          dri_bo *slice_data_bo,
2098                          struct gen7_mfd_context *gen7_mfd_context)
2099 {
2100     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2101     int scan_component_mask = 0;
2102     int i;
2103
2104     assert(slice_param->num_components > 0);
2105     assert(slice_param->num_components < 4);
2106     assert(slice_param->num_components <= pic_param->num_components);
2107
2108     for (i = 0; i < slice_param->num_components; i++) {
2109         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2110         case 1:
2111             scan_component_mask |= (1 << 0);
2112             break;
2113         case 2:
2114             scan_component_mask |= (1 << 1);
2115             break;
2116         case 3:
2117             scan_component_mask |= (1 << 2);
2118             break;
2119         default:
2120             assert(0);
2121             break;
2122         }
2123     }
2124
2125     BEGIN_BCS_BATCH(batch, 6);
2126     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2127     OUT_BCS_BATCH(batch, 
2128                   slice_param->slice_data_size);
2129     OUT_BCS_BATCH(batch, 
2130                   slice_param->slice_data_offset);
2131     OUT_BCS_BATCH(batch,
2132                   slice_param->slice_horizontal_position << 16 |
2133                   slice_param->slice_vertical_position << 0);
2134     OUT_BCS_BATCH(batch,
2135                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2136                   (scan_component_mask << 27) |                 /* scan components */
2137                   (0 << 26) |   /* disable interrupt allowed */
2138                   (slice_param->num_mcus << 0));                /* MCU count */
2139     OUT_BCS_BATCH(batch,
2140                   (slice_param->restart_interval << 0));    /* RestartInterval */
2141     ADVANCE_BCS_BATCH(batch);
2142 }
2143
2144 /* Workaround for JPEG decoding on Ivybridge */
2145 #ifdef JPEG_WA
2146
2147 static struct {
2148     int width;
2149     int height;
2150     unsigned char data[32];
2151     int data_size;
2152     int data_bit_offset;
2153     int qp;
2154 } gen7_jpeg_wa_clip = {
2155     16,
2156     16,
2157     {
2158         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2159         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2160     },
2161     14,
2162     40,
2163     28,
2164 };
2165
2166 static void
2167 gen8_jpeg_wa_init(VADriverContextP ctx,
2168                   struct gen7_mfd_context *gen7_mfd_context)
2169 {
2170     struct i965_driver_data *i965 = i965_driver_data(ctx);
2171     VAStatus status;
2172     struct object_surface *obj_surface;
2173
2174     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2175         i965_DestroySurfaces(ctx,
2176                              &gen7_mfd_context->jpeg_wa_surface_id,
2177                              1);
2178
2179     status = i965_CreateSurfaces(ctx,
2180                                  gen7_jpeg_wa_clip.width,
2181                                  gen7_jpeg_wa_clip.height,
2182                                  VA_RT_FORMAT_YUV420,
2183                                  1,
2184                                  &gen7_mfd_context->jpeg_wa_surface_id);
2185     assert(status == VA_STATUS_SUCCESS);
2186
2187     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2188     assert(obj_surface);
2189     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2190     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2191
2192     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2193         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2194                                                                "JPEG WA data",
2195                                                                0x1000,
2196                                                                0x1000);
2197         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2198                        0,
2199                        gen7_jpeg_wa_clip.data_size,
2200                        gen7_jpeg_wa_clip.data);
2201     }
2202 }
2203
2204 static void
2205 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2206                               struct gen7_mfd_context *gen7_mfd_context)
2207 {
2208     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2209
2210     BEGIN_BCS_BATCH(batch, 5);
2211     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2212     OUT_BCS_BATCH(batch,
2213                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2214                   (MFD_MODE_VLD << 15) | /* VLD mode */
2215                   (0 << 10) | /* disable Stream-Out */
2216                   (0 << 9)  | /* Post Deblocking Output */
2217                   (1 << 8)  | /* Pre Deblocking Output */
2218                   (0 << 5)  | /* not in stitch mode */
2219                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2220                   (MFX_FORMAT_AVC << 0));
2221     OUT_BCS_BATCH(batch,
2222                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2223                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2224                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2225                   (0 << 1)  |
2226                   (0 << 0));
2227     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2228     OUT_BCS_BATCH(batch, 0); /* reserved */
2229     ADVANCE_BCS_BATCH(batch);
2230 }
2231
2232 static void
2233 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2234                            struct gen7_mfd_context *gen7_mfd_context)
2235 {
2236     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2237     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2238
2239     BEGIN_BCS_BATCH(batch, 6);
2240     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2241     OUT_BCS_BATCH(batch, 0);
2242     OUT_BCS_BATCH(batch,
2243                   ((obj_surface->orig_width - 1) << 18) |
2244                   ((obj_surface->orig_height - 1) << 4));
2245     OUT_BCS_BATCH(batch,
2246                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2247                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2248                   (0 << 22) | /* surface object control state, ignored */
2249                   ((obj_surface->width - 1) << 3) | /* pitch */
2250                   (0 << 2)  | /* must be 0 */
2251                   (1 << 1)  | /* must be tiled */
2252                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2253     OUT_BCS_BATCH(batch,
2254                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2255                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2256     OUT_BCS_BATCH(batch,
2257                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2258                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2259     ADVANCE_BCS_BATCH(batch);
2260 }
2261
2262 static void
2263 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2264                                  struct gen7_mfd_context *gen7_mfd_context)
2265 {
2266     struct i965_driver_data *i965 = i965_driver_data(ctx);
2267     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2268     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2269     dri_bo *intra_bo;
2270     int i;
2271
2272     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2273                             "intra row store",
2274                             128 * 64,
2275                             0x1000);
2276
2277     BEGIN_BCS_BATCH(batch, 61);
2278     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2279     OUT_BCS_RELOC(batch,
2280                   obj_surface->bo,
2281                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2282                   0);
2283         OUT_BCS_BATCH(batch, 0);
2284         OUT_BCS_BATCH(batch, 0);
2285     
2286
2287     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2288         OUT_BCS_BATCH(batch, 0);
2289         OUT_BCS_BATCH(batch, 0);
2290
2291         /* uncompressed-video & stream out 7-12 */
2292     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2293     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2294         OUT_BCS_BATCH(batch, 0);
2295         OUT_BCS_BATCH(batch, 0);
2296         OUT_BCS_BATCH(batch, 0);
2297         OUT_BCS_BATCH(batch, 0);
2298
2299         /* the DW 13-15 is for intra row store scratch */
2300     OUT_BCS_RELOC(batch,
2301                   intra_bo,
2302                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2303                   0);
2304         OUT_BCS_BATCH(batch, 0);
2305         OUT_BCS_BATCH(batch, 0);
2306
2307         /* the DW 16-18 is for deblocking filter */ 
2308     OUT_BCS_BATCH(batch, 0);
2309         OUT_BCS_BATCH(batch, 0);
2310         OUT_BCS_BATCH(batch, 0);
2311
2312     /* DW 19..50 */
2313     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2314         OUT_BCS_BATCH(batch, 0);
2315         OUT_BCS_BATCH(batch, 0);
2316     }
2317     OUT_BCS_BATCH(batch, 0);
2318
2319         /* the DW52-54 is for mb status address */
2320     OUT_BCS_BATCH(batch, 0);
2321         OUT_BCS_BATCH(batch, 0);
2322         OUT_BCS_BATCH(batch, 0);
2323         /* the DW56-60 is for ILDB & second ILDB address */
2324     OUT_BCS_BATCH(batch, 0);
2325         OUT_BCS_BATCH(batch, 0);
2326         OUT_BCS_BATCH(batch, 0);
2327     OUT_BCS_BATCH(batch, 0);
2328         OUT_BCS_BATCH(batch, 0);
2329         OUT_BCS_BATCH(batch, 0);
2330
2331     ADVANCE_BCS_BATCH(batch);
2332
2333     dri_bo_unreference(intra_bo);
2334 }
2335
2336 static void
2337 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2338                                      struct gen7_mfd_context *gen7_mfd_context)
2339 {
2340     struct i965_driver_data *i965 = i965_driver_data(ctx);
2341     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2342     dri_bo *bsd_mpc_bo, *mpr_bo;
2343
2344     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2345                               "bsd mpc row store",
2346                               11520, /* 1.5 * 120 * 64 */
2347                               0x1000);
2348
2349     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2350                           "mpr row store",
2351                           7680, /* 1. 0 * 120 * 64 */
2352                           0x1000);
2353
2354     BEGIN_BCS_BATCH(batch, 10);
2355     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2356
2357     OUT_BCS_RELOC(batch,
2358                   bsd_mpc_bo,
2359                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2360                   0);
2361
2362     OUT_BCS_BATCH(batch, 0);
2363     OUT_BCS_BATCH(batch, 0);
2364
2365     OUT_BCS_RELOC(batch,
2366                   mpr_bo,
2367                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2368                   0);
2369     OUT_BCS_BATCH(batch, 0);
2370     OUT_BCS_BATCH(batch, 0);
2371
2372     OUT_BCS_BATCH(batch, 0);
2373     OUT_BCS_BATCH(batch, 0);
2374     OUT_BCS_BATCH(batch, 0);
2375
2376     ADVANCE_BCS_BATCH(batch);
2377
2378     dri_bo_unreference(bsd_mpc_bo);
2379     dri_bo_unreference(mpr_bo);
2380 }
2381
2382 static void
2383 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2384                           struct gen7_mfd_context *gen7_mfd_context)
2385 {
2386
2387 }
2388
2389 static void
2390 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2391                            struct gen7_mfd_context *gen7_mfd_context)
2392 {
2393     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2394     int img_struct = 0;
2395     int mbaff_frame_flag = 0;
2396     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2397
2398     BEGIN_BCS_BATCH(batch, 16);
2399     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2400     OUT_BCS_BATCH(batch, 
2401                   width_in_mbs * height_in_mbs);
2402     OUT_BCS_BATCH(batch, 
2403                   ((height_in_mbs - 1) << 16) | 
2404                   ((width_in_mbs - 1) << 0));
2405     OUT_BCS_BATCH(batch, 
2406                   (0 << 24) |
2407                   (0 << 16) |
2408                   (0 << 14) |
2409                   (0 << 13) |
2410                   (0 << 12) | /* differ from GEN6 */
2411                   (0 << 10) |
2412                   (img_struct << 8));
2413     OUT_BCS_BATCH(batch,
2414                   (1 << 10) | /* 4:2:0 */
2415                   (1 << 7) |  /* CABAC */
2416                   (0 << 6) |
2417                   (0 << 5) |
2418                   (0 << 4) |
2419                   (0 << 3) |
2420                   (1 << 2) |
2421                   (mbaff_frame_flag << 1) |
2422                   (0 << 0));
2423     OUT_BCS_BATCH(batch, 0);
2424     OUT_BCS_BATCH(batch, 0);
2425     OUT_BCS_BATCH(batch, 0);
2426     OUT_BCS_BATCH(batch, 0);
2427     OUT_BCS_BATCH(batch, 0);
2428     OUT_BCS_BATCH(batch, 0);
2429     OUT_BCS_BATCH(batch, 0);
2430     OUT_BCS_BATCH(batch, 0);
2431     OUT_BCS_BATCH(batch, 0);
2432     OUT_BCS_BATCH(batch, 0);
2433     OUT_BCS_BATCH(batch, 0);
2434     ADVANCE_BCS_BATCH(batch);
2435 }
2436
2437 static void
2438 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2439                                   struct gen7_mfd_context *gen7_mfd_context)
2440 {
2441     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2442     int i;
2443
2444     BEGIN_BCS_BATCH(batch, 71);
2445     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2446
2447     /* reference surfaces 0..15 */
2448     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2449         OUT_BCS_BATCH(batch, 0); /* top */
2450         OUT_BCS_BATCH(batch, 0); /* bottom */
2451     }
2452         
2453         OUT_BCS_BATCH(batch, 0);
2454
2455     /* the current decoding frame/field */
2456     OUT_BCS_BATCH(batch, 0); /* top */
2457     OUT_BCS_BATCH(batch, 0);
2458     OUT_BCS_BATCH(batch, 0);
2459
2460     /* POC List */
2461     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2462         OUT_BCS_BATCH(batch, 0);
2463         OUT_BCS_BATCH(batch, 0);
2464     }
2465
2466     OUT_BCS_BATCH(batch, 0);
2467     OUT_BCS_BATCH(batch, 0);
2468
2469     ADVANCE_BCS_BATCH(batch);
2470 }
2471
2472 static void
2473 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2474                                      struct gen7_mfd_context *gen7_mfd_context)
2475 {
2476     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2477
2478     BEGIN_BCS_BATCH(batch, 11);
2479     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2480     OUT_BCS_RELOC(batch,
2481                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2482                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2483                   0);
2484     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2485     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2486     OUT_BCS_BATCH(batch, 0);
2487     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2488     OUT_BCS_BATCH(batch, 0);
2489     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2490     OUT_BCS_BATCH(batch, 0);
2491     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2492     OUT_BCS_BATCH(batch, 0);
2493     ADVANCE_BCS_BATCH(batch);
2494 }
2495
2496 static void
2497 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2498                             struct gen7_mfd_context *gen7_mfd_context)
2499 {
2500     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2501
2502     /* the input bitsteam format on GEN7 differs from GEN6 */
2503     BEGIN_BCS_BATCH(batch, 6);
2504     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2505     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2506     OUT_BCS_BATCH(batch, 0);
2507     OUT_BCS_BATCH(batch,
2508                   (0 << 31) |
2509                   (0 << 14) |
2510                   (0 << 12) |
2511                   (0 << 10) |
2512                   (0 << 8));
2513     OUT_BCS_BATCH(batch,
2514                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2515                   (0 << 5)  |
2516                   (0 << 4)  |
2517                   (1 << 3) | /* LastSlice Flag */
2518                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2519     OUT_BCS_BATCH(batch, 0);
2520     ADVANCE_BCS_BATCH(batch);
2521 }
2522
2523 static void
2524 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2525                              struct gen7_mfd_context *gen7_mfd_context)
2526 {
2527     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2528     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2529     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2530     int first_mb_in_slice = 0;
2531     int slice_type = SLICE_TYPE_I;
2532
2533     BEGIN_BCS_BATCH(batch, 11);
2534     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2535     OUT_BCS_BATCH(batch, slice_type);
2536     OUT_BCS_BATCH(batch, 
2537                   (num_ref_idx_l1 << 24) |
2538                   (num_ref_idx_l0 << 16) |
2539                   (0 << 8) |
2540                   (0 << 0));
2541     OUT_BCS_BATCH(batch, 
2542                   (0 << 29) |
2543                   (1 << 27) |   /* disable Deblocking */
2544                   (0 << 24) |
2545                   (gen7_jpeg_wa_clip.qp << 16) |
2546                   (0 << 8) |
2547                   (0 << 0));
2548     OUT_BCS_BATCH(batch, 
2549                   (slice_ver_pos << 24) |
2550                   (slice_hor_pos << 16) | 
2551                   (first_mb_in_slice << 0));
2552     OUT_BCS_BATCH(batch,
2553                   (next_slice_ver_pos << 16) |
2554                   (next_slice_hor_pos << 0));
2555     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2556     OUT_BCS_BATCH(batch, 0);
2557     OUT_BCS_BATCH(batch, 0);
2558     OUT_BCS_BATCH(batch, 0);
2559     OUT_BCS_BATCH(batch, 0);
2560     ADVANCE_BCS_BATCH(batch);
2561 }
2562
2563 static void
2564 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2565                  struct gen7_mfd_context *gen7_mfd_context)
2566 {
2567     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2568     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2569     intel_batchbuffer_emit_mi_flush(batch);
2570     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2571     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2572     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2573     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2574     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2575     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2576     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2577
2578     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2579     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2580     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2581 }
2582
2583 #endif
2584
2585 void
2586 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2587                              struct decode_state *decode_state,
2588                              struct gen7_mfd_context *gen7_mfd_context)
2589 {
2590     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2591     VAPictureParameterBufferJPEGBaseline *pic_param;
2592     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2593     dri_bo *slice_data_bo;
2594     int i, j, max_selector = 0;
2595
2596     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2597     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2598
2599     /* Currently only support Baseline DCT */
2600     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2601     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2602 #ifdef JPEG_WA
2603     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2604 #endif
2605     intel_batchbuffer_emit_mi_flush(batch);
2606     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2607     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2608     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2609     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2610     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2611
2612     for (j = 0; j < decode_state->num_slice_params; j++) {
2613         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2614         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2615         slice_data_bo = decode_state->slice_datas[j]->bo;
2616         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2617
2618         if (j == decode_state->num_slice_params - 1)
2619             next_slice_group_param = NULL;
2620         else
2621             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2622
2623         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2624             int component;
2625
2626             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2627
2628             if (i < decode_state->slice_params[j]->num_elements - 1)
2629                 next_slice_param = slice_param + 1;
2630             else
2631                 next_slice_param = next_slice_group_param;
2632
2633             for (component = 0; component < slice_param->num_components; component++) {
2634                 if (max_selector < slice_param->components[component].dc_table_selector)
2635                     max_selector = slice_param->components[component].dc_table_selector;
2636
2637                 if (max_selector < slice_param->components[component].ac_table_selector)
2638                     max_selector = slice_param->components[component].ac_table_selector;
2639             }
2640
2641             slice_param++;
2642         }
2643     }
2644
2645     assert(max_selector < 2);
2646     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2647
2648     for (j = 0; j < decode_state->num_slice_params; j++) {
2649         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2650         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2651         slice_data_bo = decode_state->slice_datas[j]->bo;
2652         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2653
2654         if (j == decode_state->num_slice_params - 1)
2655             next_slice_group_param = NULL;
2656         else
2657             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2658
2659         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2660             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2661
2662             if (i < decode_state->slice_params[j]->num_elements - 1)
2663                 next_slice_param = slice_param + 1;
2664             else
2665                 next_slice_param = next_slice_group_param;
2666
2667             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2668             slice_param++;
2669         }
2670     }
2671
2672     intel_batchbuffer_end_atomic(batch);
2673     intel_batchbuffer_flush(batch);
2674 }
2675
2676 static const int vp8_dc_qlookup[128] =
2677 {
2678       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2679      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2680      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2681      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2682      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2683      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2684      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2685     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2686 };
2687
2688 static const int vp8_ac_qlookup[128] =
2689 {
2690       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2691      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2692      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2693      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2694      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2695     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2696     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2697     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2698 };
2699
2700 static inline unsigned int vp8_clip_quantization_index(int index)
2701 {
2702     if(index > 127)
2703         return 127;
2704     else if(index <0)
2705         return 0;
2706
2707     return index;
2708 }
2709
2710 static void
2711 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2712                           struct decode_state *decode_state,
2713                           struct gen7_mfd_context *gen7_mfd_context)
2714 {
2715     struct object_surface *obj_surface;
2716     struct i965_driver_data *i965 = i965_driver_data(ctx);
2717     dri_bo *bo;
2718     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2719     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2720     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2721
2722     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2723     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2724
2725     intel_update_vp8_frame_store_index(ctx,
2726                                        decode_state,
2727                                        pic_param,
2728                                        gen7_mfd_context->reference_surface);
2729
2730     /* Current decoded picture */
2731     obj_surface = decode_state->render_object;
2732     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2733
2734     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2735     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2736     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2737     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2738
2739     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2740     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2741     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2742     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2743
2744     intel_ensure_vp8_segmentation_buffer(ctx,
2745         &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2746
2747     /* The same as AVC */
2748     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2749     bo = dri_bo_alloc(i965->intel.bufmgr,
2750                       "intra row store",
2751                       width_in_mbs * 64,
2752                       0x1000);
2753     assert(bo);
2754     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2755     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2756
2757     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2758     bo = dri_bo_alloc(i965->intel.bufmgr,
2759                       "deblocking filter row store",
2760                       width_in_mbs * 64 * 4,
2761                       0x1000);
2762     assert(bo);
2763     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2764     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2765
2766     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2767     bo = dri_bo_alloc(i965->intel.bufmgr,
2768                       "bsd mpc row store",
2769                       width_in_mbs * 64 * 2,
2770                       0x1000);
2771     assert(bo);
2772     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2773     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2774
2775     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2776     bo = dri_bo_alloc(i965->intel.bufmgr,
2777                       "mpr row store",
2778                       width_in_mbs * 64 * 2,
2779                       0x1000);
2780     assert(bo);
2781     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2782     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2783
2784     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2785 }
2786
2787 static void
2788 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2789                        struct decode_state *decode_state,
2790                        struct gen7_mfd_context *gen7_mfd_context)
2791 {
2792     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2793     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2794     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2795     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2796     dri_bo *probs_bo = decode_state->probability_data->bo;
2797     int i, j,log2num;
2798     unsigned int quantization_value[4][6];
2799
2800     /* There is no safe way to error out if the segmentation buffer
2801        could not be allocated. So, instead of aborting, simply decode
2802        something even if the result may look totally inacurate */
2803     const unsigned int enable_segmentation =
2804         pic_param->pic_fields.bits.segmentation_enabled &&
2805         gen7_mfd_context->segmentation_buffer.valid;
2806         
2807     log2num = (int)log2(slice_param->num_of_partitions - 1);
2808
2809     BEGIN_BCS_BATCH(batch, 38);
2810     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2811     OUT_BCS_BATCH(batch,
2812                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2813                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2814     OUT_BCS_BATCH(batch,
2815                   log2num << 24 |
2816                   pic_param->pic_fields.bits.sharpness_level << 16 |
2817                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2818                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2819                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2820                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2821                   pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2822                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2823                   (enable_segmentation &&
2824                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2825                   (enable_segmentation &&
2826                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2827                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2828                   pic_param->pic_fields.bits.filter_type << 4 |
2829                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2830                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2831
2832     OUT_BCS_BATCH(batch,
2833                   pic_param->loop_filter_level[3] << 24 |
2834                   pic_param->loop_filter_level[2] << 16 |
2835                   pic_param->loop_filter_level[1] <<  8 |
2836                   pic_param->loop_filter_level[0] <<  0);
2837
2838     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2839     for (i = 0; i < 4; i++) {
2840                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2841                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2842                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2843                 /* 101581>>16 is equivalent to 155/100 */
2844                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2845                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2846                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2847
2848                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2849                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2850
2851                 OUT_BCS_BATCH(batch,
2852                       quantization_value[i][0] << 16 | /* Y1AC */
2853                       quantization_value[i][1] <<  0); /* Y1DC */
2854         OUT_BCS_BATCH(batch,
2855                       quantization_value[i][5] << 16 | /* UVAC */
2856                       quantization_value[i][4] <<  0); /* UVDC */
2857         OUT_BCS_BATCH(batch,
2858                       quantization_value[i][3] << 16 | /* Y2AC */
2859                       quantization_value[i][2] <<  0); /* Y2DC */
2860     }
2861
2862     /* CoeffProbability table for non-key frame, DW16-DW18 */
2863     if (probs_bo) {
2864         OUT_BCS_RELOC(batch, probs_bo,
2865                       0, I915_GEM_DOMAIN_INSTRUCTION,
2866                       0);
2867         OUT_BCS_BATCH(batch, 0);
2868         OUT_BCS_BATCH(batch, 0);
2869     } else {
2870         OUT_BCS_BATCH(batch, 0);
2871         OUT_BCS_BATCH(batch, 0);
2872         OUT_BCS_BATCH(batch, 0);
2873     }
2874
2875     OUT_BCS_BATCH(batch,
2876                   pic_param->mb_segment_tree_probs[2] << 16 |
2877                   pic_param->mb_segment_tree_probs[1] <<  8 |
2878                   pic_param->mb_segment_tree_probs[0] <<  0);
2879
2880     OUT_BCS_BATCH(batch,
2881                   pic_param->prob_skip_false << 24 |
2882                   pic_param->prob_intra      << 16 |
2883                   pic_param->prob_last       <<  8 |
2884                   pic_param->prob_gf         <<  0);
2885
2886     OUT_BCS_BATCH(batch,
2887                   pic_param->y_mode_probs[3] << 24 |
2888                   pic_param->y_mode_probs[2] << 16 |
2889                   pic_param->y_mode_probs[1] <<  8 |
2890                   pic_param->y_mode_probs[0] <<  0);
2891
2892     OUT_BCS_BATCH(batch,
2893                   pic_param->uv_mode_probs[2] << 16 |
2894                   pic_param->uv_mode_probs[1] <<  8 |
2895                   pic_param->uv_mode_probs[0] <<  0);
2896     
2897     /* MV update value, DW23-DW32 */
2898     for (i = 0; i < 2; i++) {
2899         for (j = 0; j < 20; j += 4) {
2900             OUT_BCS_BATCH(batch,
2901                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2902                           pic_param->mv_probs[i][j + 2] << 16 |
2903                           pic_param->mv_probs[i][j + 1] <<  8 |
2904                           pic_param->mv_probs[i][j + 0] <<  0);
2905         }
2906     }
2907
2908     OUT_BCS_BATCH(batch,
2909                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2910                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2911                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2912                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2913
2914     OUT_BCS_BATCH(batch,
2915                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2916                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2917                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2918                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2919
2920     /* segmentation id stream base address, DW35-DW37 */
2921     if (enable_segmentation) {
2922         OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2923                       0, I915_GEM_DOMAIN_INSTRUCTION,
2924                       0);
2925         OUT_BCS_BATCH(batch, 0);
2926         OUT_BCS_BATCH(batch, 0);
2927     }
2928     else {
2929         OUT_BCS_BATCH(batch, 0);
2930         OUT_BCS_BATCH(batch, 0);
2931         OUT_BCS_BATCH(batch, 0);
2932     }
2933     ADVANCE_BCS_BATCH(batch);
2934 }
2935
2936 static void
2937 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2938                         VAPictureParameterBufferVP8 *pic_param,
2939                         VASliceParameterBufferVP8 *slice_param,
2940                         dri_bo *slice_data_bo,
2941                         struct gen7_mfd_context *gen7_mfd_context)
2942 {
2943     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2944     int i, log2num;
2945     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2946     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2947     unsigned int partition_size_0 = slice_param->partition_size[0];
2948
2949     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2950     if (used_bits == 8) {
2951         used_bits = 0;
2952         offset += 1;
2953         partition_size_0 -= 1;
2954     }
2955
2956     assert(slice_param->num_of_partitions >= 2);
2957     assert(slice_param->num_of_partitions <= 9);
2958
2959     log2num = (int)log2(slice_param->num_of_partitions - 1);
2960
2961     BEGIN_BCS_BATCH(batch, 22);
2962     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2963     OUT_BCS_BATCH(batch,
2964                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2965                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2966                   log2num << 4 |
2967                   (slice_param->macroblock_offset & 0x7));
2968     OUT_BCS_BATCH(batch,
2969                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2970                   0);
2971
2972     OUT_BCS_BATCH(batch, partition_size_0);
2973     OUT_BCS_BATCH(batch, offset);
2974     //partion sizes in bytes are present after the above first partition when there are more than one token partition
2975     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2976     for (i = 1; i < 9; i++) {
2977         if (i < slice_param->num_of_partitions) {
2978             OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2979             OUT_BCS_BATCH(batch, offset);
2980         } else {
2981             OUT_BCS_BATCH(batch, 0);
2982             OUT_BCS_BATCH(batch, 0);
2983         }
2984
2985         offset += slice_param->partition_size[i];
2986     }
2987
2988     OUT_BCS_BATCH(batch,
2989                   1 << 31 | /* concealment method */
2990                   0);
2991
2992     ADVANCE_BCS_BATCH(batch);
2993 }
2994
2995 void
2996 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
2997                             struct decode_state *decode_state,
2998                             struct gen7_mfd_context *gen7_mfd_context)
2999 {
3000     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3001     VAPictureParameterBufferVP8 *pic_param;
3002     VASliceParameterBufferVP8 *slice_param;
3003     dri_bo *slice_data_bo;
3004
3005     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3006     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3007
3008     /* one slice per frame */
3009     if (decode_state->num_slice_params != 1 ||
3010         (!decode_state->slice_params ||
3011          !decode_state->slice_params[0] ||
3012          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3013         (!decode_state->slice_datas ||
3014          !decode_state->slice_datas[0] ||
3015          !decode_state->slice_datas[0]->bo) ||
3016         !decode_state->probability_data) {
3017         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3018
3019         return;
3020     }
3021
3022     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3023     slice_data_bo = decode_state->slice_datas[0]->bo;
3024
3025     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3026     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3027     intel_batchbuffer_emit_mi_flush(batch);
3028     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3029     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3030     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3031     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3032     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3033     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3034     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3035     intel_batchbuffer_end_atomic(batch);
3036     intel_batchbuffer_flush(batch);
3037 }
3038
3039 static VAStatus
3040 gen8_mfd_decode_picture(VADriverContextP ctx, 
3041                         VAProfile profile, 
3042                         union codec_state *codec_state,
3043                         struct hw_context *hw_context)
3044
3045 {
3046     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3047     struct decode_state *decode_state = &codec_state->decode;
3048     VAStatus vaStatus;
3049
3050     assert(gen7_mfd_context);
3051
3052     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3053
3054     if (vaStatus != VA_STATUS_SUCCESS)
3055         goto out;
3056
3057     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3058
3059     switch (profile) {
3060     case VAProfileMPEG2Simple:
3061     case VAProfileMPEG2Main:
3062         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3063         break;
3064         
3065     case VAProfileH264ConstrainedBaseline:
3066     case VAProfileH264Main:
3067     case VAProfileH264High:
3068     case VAProfileH264StereoHigh:
3069     case VAProfileH264MultiviewHigh:
3070         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3071         break;
3072
3073     case VAProfileVC1Simple:
3074     case VAProfileVC1Main:
3075     case VAProfileVC1Advanced:
3076         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3077         break;
3078
3079     case VAProfileJPEGBaseline:
3080         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3081         break;
3082
3083     case VAProfileVP8Version0_3:
3084         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3085         break;
3086
3087     default:
3088         assert(0);
3089         break;
3090     }
3091
3092     vaStatus = VA_STATUS_SUCCESS;
3093
3094 out:
3095     return vaStatus;
3096 }
3097
3098 static void
3099 gen8_mfd_context_destroy(void *hw_context)
3100 {
3101     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3102
3103     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3104     gen7_mfd_context->post_deblocking_output.bo = NULL;
3105
3106     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3107     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3108
3109     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3110     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3111
3112     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3113     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3114
3115     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3116     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3117
3118     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3119     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3120
3121     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3122     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3123
3124     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3125     gen7_mfd_context->segmentation_buffer.bo = NULL;
3126
3127     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3128
3129     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3130     free(gen7_mfd_context);
3131 }
3132
3133 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3134                                     struct gen7_mfd_context *gen7_mfd_context)
3135 {
3136     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3137     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3138     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3139     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3140 }
3141
3142 struct hw_context *
3143 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3144 {
3145     struct intel_driver_data *intel = intel_driver_data(ctx);
3146     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3147     int i;
3148
3149     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3150     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3151     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3152
3153     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3154         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3155         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3156     }
3157
3158     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3159     gen7_mfd_context->segmentation_buffer.valid = 0;
3160
3161     switch (obj_config->profile) {
3162     case VAProfileMPEG2Simple:
3163     case VAProfileMPEG2Main:
3164         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3165         break;
3166
3167     case VAProfileH264ConstrainedBaseline:
3168     case VAProfileH264Main:
3169     case VAProfileH264High:
3170     case VAProfileH264StereoHigh:
3171     case VAProfileH264MultiviewHigh:
3172         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3173         break;
3174     default:
3175         break;
3176     }
3177     return (struct hw_context *)gen7_mfd_context;
3178 }