OSDN Git Service

Fix the incorrect 48-bit address reallocation
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77
78         if (!gen7_avc_surface)
79             return;
80
81         gen7_avc_surface->base.frame_store_id = -1;
82         assert((obj_surface->size & 0x3f) == 0);
83         obj_surface->private_data = gen7_avc_surface;
84     }
85
86     /* DMV buffers now relate to the whole frame, irrespective of
87        field coding modes */
88     if (gen7_avc_surface->dmv_top == NULL) {
89         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90                                                  "direct mv w/r buffer",
91                                                  width_in_mbs * height_in_mbs * 128,
92                                                  0x1000);
93         assert(gen7_avc_surface->dmv_top);
94     }
95 }
96
97 static void
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99                           struct decode_state *decode_state,
100                           int standard_select,
101                           struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG ||
109            standard_select == MFX_FORMAT_VP8);
110
111     BEGIN_BCS_BATCH(batch, 5);
112     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
113     OUT_BCS_BATCH(batch,
114                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
115                   (MFD_MODE_VLD << 15) | /* VLD mode */
116                   (0 << 10) | /* disable Stream-Out */
117                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
118                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
119                   (0 << 5)  | /* not in stitch mode */
120                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
121                   (standard_select << 0));
122     OUT_BCS_BATCH(batch,
123                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
124                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
125                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
126                   (0 << 1)  |
127                   (0 << 0));
128     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
129     OUT_BCS_BATCH(batch, 0); /* reserved */
130     ADVANCE_BCS_BATCH(batch);
131 }
132
133 static void
134 gen8_mfd_surface_state(VADriverContextP ctx,
135                        struct decode_state *decode_state,
136                        int standard_select,
137                        struct gen7_mfd_context *gen7_mfd_context)
138 {
139     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140     struct object_surface *obj_surface = decode_state->render_object;
141     unsigned int y_cb_offset;
142     unsigned int y_cr_offset;
143     unsigned int surface_format;
144
145     assert(obj_surface);
146
147     y_cb_offset = obj_surface->y_cb_offset;
148     y_cr_offset = obj_surface->y_cr_offset;
149
150     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
152
153     BEGIN_BCS_BATCH(batch, 6);
154     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155     OUT_BCS_BATCH(batch, 0);
156     OUT_BCS_BATCH(batch,
157                   ((obj_surface->orig_height - 1) << 18) |
158                   ((obj_surface->orig_width - 1) << 4));
159     OUT_BCS_BATCH(batch,
160                   (surface_format << 28) | /* 420 planar YUV surface */
161                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162                   (0 << 22) | /* surface object control state, ignored */
163                   ((obj_surface->width - 1) << 3) | /* pitch */
164                   (0 << 2)  | /* must be 0 */
165                   (1 << 1)  | /* must be tiled */
166                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
167     OUT_BCS_BATCH(batch,
168                   (0 << 16) | /* X offset for U(Cb), must be 0 */
169                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
170     OUT_BCS_BATCH(batch,
171                   (0 << 16) | /* X offset for V(Cr), must be 0 */
172                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173     ADVANCE_BCS_BATCH(batch);
174 }
175
176 static void
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178                              struct decode_state *decode_state,
179                              int standard_select,
180                              struct gen7_mfd_context *gen7_mfd_context)
181 {
182     struct i965_driver_data *i965 = i965_driver_data(ctx);
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188         /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                       0);
193     else {
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197     }
198     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
199
200         /* Post-debloing 4-6 */
201     if (gen7_mfd_context->post_deblocking_output.valid)
202         OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
203                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                       0);
205     else {
206         OUT_BCS_BATCH(batch, 0);
207
208         OUT_BCS_BATCH(batch, 0);
209     }
210     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
211
212         /* uncompressed-video & stream out 7-12 */
213     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
214     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
215         OUT_BCS_BATCH(batch, 0);
216         OUT_BCS_BATCH(batch, 0);
217         OUT_BCS_BATCH(batch, 0);
218         OUT_BCS_BATCH(batch, 0);
219
220         /* intra row-store scratch 13-15 */
221     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
222         OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
223                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
224                       0);
225     else {
226         OUT_BCS_BATCH(batch, 0);
227
228         OUT_BCS_BATCH(batch, 0);
229     }
230     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
231
232         /* deblocking-filter-row-store 16-18 */
233     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
234         OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
235                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
236                       0);
237     else {
238         OUT_BCS_BATCH(batch, 0);
239         OUT_BCS_BATCH(batch, 0);
240     }
241
242     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
243
244     /* DW 19..50 */
245     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
246         struct object_surface *obj_surface;
247
248         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
249             gen7_mfd_context->reference_surface[i].obj_surface &&
250             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
251             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
252
253             OUT_BCS_RELOC64(batch, obj_surface->bo,
254                           I915_GEM_DOMAIN_INSTRUCTION, 0,
255                           0);
256         } else {
257             OUT_BCS_BATCH(batch, 0);
258             OUT_BCS_BATCH(batch, 0);
259         }
260         
261     }
262     
263     /* reference property 51 */
264     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
265         
266     /* Macroblock status & ILDB 52-57 */
267     OUT_BCS_BATCH(batch, 0);
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271     OUT_BCS_BATCH(batch, 0);
272     OUT_BCS_BATCH(batch, 0);
273
274     /* the second Macroblock status 58-60 */    
275     OUT_BCS_BATCH(batch, 0);
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch, 0);
278
279     ADVANCE_BCS_BATCH(batch);
280 }
281
282 static void
283 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
284                                  dri_bo *slice_data_bo,
285                                  int standard_select,
286                                  struct gen7_mfd_context *gen7_mfd_context)
287 {
288     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
289     struct i965_driver_data *i965 = i965_driver_data(ctx);
290
291     BEGIN_BCS_BATCH(batch, 26);
292     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
293         /* MFX In BS 1-5 */
294     OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
295     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
296         /* Upper bound 4-5 */   
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299
300         /* MFX indirect MV 6-10 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306         
307         /* MFX IT_COFF 11-15 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314         /* MFX IT_DBLK 16-20 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321         /* MFX PAK_BSE object for encoder 21-25 */
322     OUT_BCS_BATCH(batch, 0);
323     OUT_BCS_BATCH(batch, 0);
324     OUT_BCS_BATCH(batch, 0);
325     OUT_BCS_BATCH(batch, 0);
326     OUT_BCS_BATCH(batch, 0);
327
328     ADVANCE_BCS_BATCH(batch);
329 }
330
331 static void
332 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
333                                  struct decode_state *decode_state,
334                                  int standard_select,
335                                  struct gen7_mfd_context *gen7_mfd_context)
336 {
337     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
338     struct i965_driver_data *i965 = i965_driver_data(ctx);
339
340     BEGIN_BCS_BATCH(batch, 10);
341     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
342
343     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
344         OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
345                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
346                       0);
347         else {
348                 OUT_BCS_BATCH(batch, 0);
349                 OUT_BCS_BATCH(batch, 0);
350     }
351                 
352     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
353         /* MPR Row Store Scratch buffer 4-6 */
354     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
355         OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
356                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
357                       0);
358     else {
359         OUT_BCS_BATCH(batch, 0);
360         OUT_BCS_BATCH(batch, 0);
361     }
362
363     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
364
365         /* Bitplane 7-9 */ 
366     if (gen7_mfd_context->bitplane_read_buffer.valid)
367         OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
368                       I915_GEM_DOMAIN_INSTRUCTION, 0,
369                       0);
370     else {
371         OUT_BCS_BATCH(batch, 0);
372         OUT_BCS_BATCH(batch, 0);
373     }
374     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
375     ADVANCE_BCS_BATCH(batch);
376 }
377
378 static void
379 gen8_mfd_qm_state(VADriverContextP ctx,
380                   int qm_type,
381                   unsigned char *qm,
382                   int qm_length,
383                   struct gen7_mfd_context *gen7_mfd_context)
384 {
385     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
386     unsigned int qm_buffer[16];
387
388     assert(qm_length <= 16 * 4);
389     memcpy(qm_buffer, qm, qm_length);
390
391     BEGIN_BCS_BATCH(batch, 18);
392     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
393     OUT_BCS_BATCH(batch, qm_type << 0);
394     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
395     ADVANCE_BCS_BATCH(batch);
396 }
397
398 static void
399 gen8_mfd_avc_img_state(VADriverContextP ctx,
400                        struct decode_state *decode_state,
401                        struct gen7_mfd_context *gen7_mfd_context)
402 {
403     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
404     int img_struct;
405     int mbaff_frame_flag;
406     unsigned int width_in_mbs, height_in_mbs;
407     VAPictureParameterBufferH264 *pic_param;
408
409     assert(decode_state->pic_param && decode_state->pic_param->buffer);
410     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
412
413     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
414         img_struct = 1;
415     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
416         img_struct = 3;
417     else
418         img_struct = 0;
419
420     if ((img_struct & 0x1) == 0x1) {
421         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
422     } else {
423         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
424     }
425
426     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
427         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
428         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
429     } else {
430         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
431     }
432
433     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
434                         !pic_param->pic_fields.bits.field_pic_flag);
435
436     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
437     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
438
439     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
440     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
441            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
442     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
443
444     BEGIN_BCS_BATCH(batch, 17);
445     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
446     OUT_BCS_BATCH(batch, 
447                   (width_in_mbs * height_in_mbs - 1));
448     OUT_BCS_BATCH(batch, 
449                   ((height_in_mbs - 1) << 16) | 
450                   ((width_in_mbs - 1) << 0));
451     OUT_BCS_BATCH(batch, 
452                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
453                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
454                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
455                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
456                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
457                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
458                   (img_struct << 8));
459     OUT_BCS_BATCH(batch,
460                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
461                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
462                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
463                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
464                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
465                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
466                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
467                   (mbaff_frame_flag << 1) |
468                   (pic_param->pic_fields.bits.field_pic_flag << 0));
469     OUT_BCS_BATCH(batch, 0);
470     OUT_BCS_BATCH(batch, 0);
471     OUT_BCS_BATCH(batch, 0);
472     OUT_BCS_BATCH(batch, 0);
473     OUT_BCS_BATCH(batch, 0);
474     OUT_BCS_BATCH(batch, 0);
475     OUT_BCS_BATCH(batch, 0);
476     OUT_BCS_BATCH(batch, 0);
477     OUT_BCS_BATCH(batch, 0);
478     OUT_BCS_BATCH(batch, 0);
479     OUT_BCS_BATCH(batch, 0);
480     OUT_BCS_BATCH(batch, 0);
481     ADVANCE_BCS_BATCH(batch);
482 }
483
484 static void
485 gen8_mfd_avc_qm_state(VADriverContextP ctx,
486                       struct decode_state *decode_state,
487                       struct gen7_mfd_context *gen7_mfd_context)
488 {
489     VAIQMatrixBufferH264 *iq_matrix;
490     VAPictureParameterBufferH264 *pic_param;
491
492     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
493         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
494     else
495         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
496
497     assert(decode_state->pic_param && decode_state->pic_param->buffer);
498     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
499
500     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
501     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
502
503     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
504         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
505         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
506     }
507 }
508
509 static inline void
510 gen8_mfd_avc_picid_state(VADriverContextP ctx,
511     struct decode_state *decode_state,
512     struct gen7_mfd_context *gen7_mfd_context)
513 {
514     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
515         gen7_mfd_context->reference_surface);
516 }
517
518 static void
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520                               struct decode_state *decode_state,
521                               VAPictureParameterBufferH264 *pic_param,
522                               VASliceParameterBufferH264 *slice_param,
523                               struct gen7_mfd_context *gen7_mfd_context)
524 {
525     struct i965_driver_data *i965 = i965_driver_data(ctx);
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
545                           I915_GEM_DOMAIN_INSTRUCTION, 0,
546                           0);
547         } else {
548             OUT_BCS_BATCH(batch, 0);
549             OUT_BCS_BATCH(batch, 0);
550         }
551     }
552     
553     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
554
555     /* the current decoding frame/field */
556     va_pic = &pic_param->CurrPic;
557     obj_surface = decode_state->render_object;
558     assert(obj_surface->bo && obj_surface->private_data);
559     gen7_avc_surface = obj_surface->private_data;
560
561     OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
562                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
563                   0);
564
565     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
566
567     /* POC List */
568     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
569         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
570
571         if (obj_surface) {
572             const VAPictureH264 * const va_pic = avc_find_picture(
573                 obj_surface->base.id, pic_param->ReferenceFrames,
574                 ARRAY_ELEMS(pic_param->ReferenceFrames));
575
576             assert(va_pic != NULL);
577             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
578             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
579         } else {
580             OUT_BCS_BATCH(batch, 0);
581             OUT_BCS_BATCH(batch, 0);
582         }
583     }
584
585     va_pic = &pic_param->CurrPic;
586     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
587     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
588
589     ADVANCE_BCS_BATCH(batch);
590 }
591
592 static void
593 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
594                                  VAPictureParameterBufferH264 *pic_param,
595                                  VASliceParameterBufferH264 *next_slice_param,
596                                  struct gen7_mfd_context *gen7_mfd_context)
597 {
598     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
599 }
600
601 static void
602 gen8_mfd_avc_slice_state(VADriverContextP ctx,
603                          VAPictureParameterBufferH264 *pic_param,
604                          VASliceParameterBufferH264 *slice_param,
605                          VASliceParameterBufferH264 *next_slice_param,
606                          struct gen7_mfd_context *gen7_mfd_context)
607 {
608     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
609     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
610     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
611     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
612     int num_ref_idx_l0, num_ref_idx_l1;
613     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
614                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
615     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
616     int slice_type;
617
618     if (slice_param->slice_type == SLICE_TYPE_I ||
619         slice_param->slice_type == SLICE_TYPE_SI) {
620         slice_type = SLICE_TYPE_I;
621     } else if (slice_param->slice_type == SLICE_TYPE_P ||
622                slice_param->slice_type == SLICE_TYPE_SP) {
623         slice_type = SLICE_TYPE_P;
624     } else { 
625         assert(slice_param->slice_type == SLICE_TYPE_B);
626         slice_type = SLICE_TYPE_B;
627     }
628
629     if (slice_type == SLICE_TYPE_I) {
630         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
631         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
632         num_ref_idx_l0 = 0;
633         num_ref_idx_l1 = 0;
634     } else if (slice_type == SLICE_TYPE_P) {
635         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
637         num_ref_idx_l1 = 0;
638     } else {
639         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
640         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
641     }
642
643     first_mb_in_slice = slice_param->first_mb_in_slice;
644     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
645     slice_ver_pos = first_mb_in_slice / width_in_mbs;
646
647     if (mbaff_picture)
648         slice_ver_pos = slice_ver_pos << 1;
649     if (next_slice_param) {
650         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
651         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
652         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
653
654         if (mbaff_picture)
655             next_slice_ver_pos = next_slice_ver_pos << 1;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch, 
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch, 
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch, 
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) | 
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch, 
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720     
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch, 
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
839         gen7_mfd_context->reference_surface);
840     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
841     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
842     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
843     assert(height_in_mbs > 0 && height_in_mbs <= 256);
844
845     /* Current decoded picture */
846     obj_surface = decode_state->render_object;
847     if (pic_param->pic_fields.bits.reference_pic_flag)
848         obj_surface->flags |= SURFACE_REFERENCED;
849     else
850         obj_surface->flags &= ~SURFACE_REFERENCED;
851
852     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
853     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
854
855     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
856     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
857     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
858     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
859
860     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
861     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
862     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
863     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
864
865     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
866     bo = dri_bo_alloc(i965->intel.bufmgr,
867                       "intra row store",
868                       width_in_mbs * 64,
869                       0x1000);
870     assert(bo);
871     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
872     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
873
874     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
875     bo = dri_bo_alloc(i965->intel.bufmgr,
876                       "deblocking filter row store",
877                       width_in_mbs * 64 * 4,
878                       0x1000);
879     assert(bo);
880     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
881     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
882
883     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
884     bo = dri_bo_alloc(i965->intel.bufmgr,
885                       "bsd mpc row store",
886                       width_in_mbs * 64 * 2,
887                       0x1000);
888     assert(bo);
889     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
890     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
891
892     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
893     bo = dri_bo_alloc(i965->intel.bufmgr,
894                       "mpr row store",
895                       width_in_mbs * 64 * 2,
896                       0x1000);
897     assert(bo);
898     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
899     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
900
901     gen7_mfd_context->bitplane_read_buffer.valid = 0;
902 }
903
904 static void
905 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
906                             struct decode_state *decode_state,
907                             struct gen7_mfd_context *gen7_mfd_context)
908 {
909     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
910     VAPictureParameterBufferH264 *pic_param;
911     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
912     dri_bo *slice_data_bo;
913     int i, j;
914
915     assert(decode_state->pic_param && decode_state->pic_param->buffer);
916     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
917     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
918
919     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
920     intel_batchbuffer_emit_mi_flush(batch);
921     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
923     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
924     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
925     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
926     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
927     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
928
929     for (j = 0; j < decode_state->num_slice_params; j++) {
930         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
931         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
932         slice_data_bo = decode_state->slice_datas[j]->bo;
933         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
934
935         if (j == decode_state->num_slice_params - 1)
936             next_slice_group_param = NULL;
937         else
938             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
939
940         if (j == 0 && slice_param->first_mb_in_slice)
941             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); 
942
943         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
944             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
945             assert((slice_param->slice_type == SLICE_TYPE_I) ||
946                    (slice_param->slice_type == SLICE_TYPE_SI) ||
947                    (slice_param->slice_type == SLICE_TYPE_P) ||
948                    (slice_param->slice_type == SLICE_TYPE_SP) ||
949                    (slice_param->slice_type == SLICE_TYPE_B));
950
951             if (i < decode_state->slice_params[j]->num_elements - 1)
952                 next_slice_param = slice_param + 1;
953             else
954                 next_slice_param = next_slice_group_param;
955
956             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
957             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
958             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
959             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
960             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
961             slice_param++;
962         }
963     }
964
965     intel_batchbuffer_end_atomic(batch);
966     intel_batchbuffer_flush(batch);
967 }
968
969 static void
970 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
971                            struct decode_state *decode_state,
972                            struct gen7_mfd_context *gen7_mfd_context)
973 {
974     VAPictureParameterBufferMPEG2 *pic_param;
975     struct i965_driver_data *i965 = i965_driver_data(ctx);
976     struct object_surface *obj_surface;
977     dri_bo *bo;
978     unsigned int width_in_mbs;
979
980     assert(decode_state->pic_param && decode_state->pic_param->buffer);
981     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
982     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
983
984     mpeg2_set_reference_surfaces(
985         ctx,
986         gen7_mfd_context->reference_surface,
987         decode_state,
988         pic_param
989     );
990
991     /* Current decoded picture */
992     obj_surface = decode_state->render_object;
993     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
994
995     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
996     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
997     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
998     gen7_mfd_context->pre_deblocking_output.valid = 1;
999
1000     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1001     bo = dri_bo_alloc(i965->intel.bufmgr,
1002                       "bsd mpc row store",
1003                       width_in_mbs * 96,
1004                       0x1000);
1005     assert(bo);
1006     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1007     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1008
1009     gen7_mfd_context->post_deblocking_output.valid = 0;
1010     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1011     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1012     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1013     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1014 }
1015
1016 static void
1017 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1018                          struct decode_state *decode_state,
1019                          struct gen7_mfd_context *gen7_mfd_context)
1020 {
1021     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1022     VAPictureParameterBufferMPEG2 *pic_param;
1023     unsigned int slice_concealment_disable_bit = 0;
1024
1025     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1026     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1027
1028     slice_concealment_disable_bit = 1;
1029
1030     BEGIN_BCS_BATCH(batch, 13);
1031     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1032     OUT_BCS_BATCH(batch,
1033                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1034                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1035                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1036                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1037                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1038                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1039                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1040                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1041                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1042                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1043                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1044                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1045     OUT_BCS_BATCH(batch,
1046                   pic_param->picture_coding_type << 9);
1047     OUT_BCS_BATCH(batch,
1048                   (slice_concealment_disable_bit << 31) |
1049                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1050                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1051     OUT_BCS_BATCH(batch, 0);
1052     OUT_BCS_BATCH(batch, 0);
1053     OUT_BCS_BATCH(batch, 0);
1054     OUT_BCS_BATCH(batch, 0);
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     ADVANCE_BCS_BATCH(batch);
1061 }
1062
1063 static void
1064 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1065                         struct decode_state *decode_state,
1066                         struct gen7_mfd_context *gen7_mfd_context)
1067 {
1068     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1069     int i, j;
1070
1071     /* Update internal QM state */
1072     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1073         VAIQMatrixBufferMPEG2 * const iq_matrix =
1074             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1075
1076         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1077             iq_matrix->load_intra_quantiser_matrix) {
1078             gen_iq_matrix->load_intra_quantiser_matrix =
1079                 iq_matrix->load_intra_quantiser_matrix;
1080             if (iq_matrix->load_intra_quantiser_matrix) {
1081                 for (j = 0; j < 64; j++)
1082                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1083                         iq_matrix->intra_quantiser_matrix[j];
1084             }
1085         }
1086
1087         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1088             iq_matrix->load_non_intra_quantiser_matrix) {
1089             gen_iq_matrix->load_non_intra_quantiser_matrix =
1090                 iq_matrix->load_non_intra_quantiser_matrix;
1091             if (iq_matrix->load_non_intra_quantiser_matrix) {
1092                 for (j = 0; j < 64; j++)
1093                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1094                         iq_matrix->non_intra_quantiser_matrix[j];
1095             }
1096         }
1097     }
1098
1099     /* Commit QM state to HW */
1100     for (i = 0; i < 2; i++) {
1101         unsigned char *qm = NULL;
1102         int qm_type;
1103
1104         if (i == 0) {
1105             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1106                 qm = gen_iq_matrix->intra_quantiser_matrix;
1107                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1108             }
1109         } else {
1110             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1111                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1112                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1113             }
1114         }
1115
1116         if (!qm)
1117             continue;
1118
1119         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1120     }
1121 }
1122
1123 static void
1124 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1125                           VAPictureParameterBufferMPEG2 *pic_param,
1126                           VASliceParameterBufferMPEG2 *slice_param,
1127                           VASliceParameterBufferMPEG2 *next_slice_param,
1128                           struct gen7_mfd_context *gen7_mfd_context)
1129 {
1130     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1131     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1132     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1133
1134     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1135         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1136         is_field_pic = 1;
1137     is_field_pic_wa = is_field_pic &&
1138         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1139
1140     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1141     hpos0 = slice_param->slice_horizontal_position;
1142
1143     if (next_slice_param == NULL) {
1144         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1145         hpos1 = 0;
1146     } else {
1147         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1148         hpos1 = next_slice_param->slice_horizontal_position;
1149     }
1150
1151     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1152
1153     BEGIN_BCS_BATCH(batch, 5);
1154     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1155     OUT_BCS_BATCH(batch, 
1156                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1157     OUT_BCS_BATCH(batch, 
1158                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1159     OUT_BCS_BATCH(batch,
1160                   hpos0 << 24 |
1161                   vpos0 << 16 |
1162                   mb_count << 8 |
1163                   (next_slice_param == NULL) << 5 |
1164                   (next_slice_param == NULL) << 3 |
1165                   (slice_param->macroblock_offset & 0x7));
1166     OUT_BCS_BATCH(batch,
1167                   (slice_param->quantiser_scale_code << 24) |
1168                   (vpos1 << 8 | hpos1));
1169     ADVANCE_BCS_BATCH(batch);
1170 }
1171
1172 static void
1173 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1174                               struct decode_state *decode_state,
1175                               struct gen7_mfd_context *gen7_mfd_context)
1176 {
1177     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1178     VAPictureParameterBufferMPEG2 *pic_param;
1179     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1180     dri_bo *slice_data_bo;
1181     int i, j;
1182
1183     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1184     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1185
1186     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1187     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1188     intel_batchbuffer_emit_mi_flush(batch);
1189     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1194     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1195
1196     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1197         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1198             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1199
1200     for (j = 0; j < decode_state->num_slice_params; j++) {
1201         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1202         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1203         slice_data_bo = decode_state->slice_datas[j]->bo;
1204         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1205
1206         if (j == decode_state->num_slice_params - 1)
1207             next_slice_group_param = NULL;
1208         else
1209             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1210
1211         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1212             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1213
1214             if (i < decode_state->slice_params[j]->num_elements - 1)
1215                 next_slice_param = slice_param + 1;
1216             else
1217                 next_slice_param = next_slice_group_param;
1218
1219             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1220             slice_param++;
1221         }
1222     }
1223
1224     intel_batchbuffer_end_atomic(batch);
1225     intel_batchbuffer_flush(batch);
1226 }
1227
1228 static const int va_to_gen7_vc1_pic_type[5] = {
1229     GEN7_VC1_I_PICTURE,
1230     GEN7_VC1_P_PICTURE,
1231     GEN7_VC1_B_PICTURE,
1232     GEN7_VC1_BI_PICTURE,
1233     GEN7_VC1_P_PICTURE,
1234 };
1235
1236 static const int va_to_gen7_vc1_mv[4] = {
1237     1, /* 1-MV */
1238     2, /* 1-MV half-pel */
1239     3, /* 1-MV half-pef bilinear */
1240     0, /* Mixed MV */
1241 };
1242
1243 static const int b_picture_scale_factor[21] = {
1244     128, 85,  170, 64,  192,
1245     51,  102, 153, 204, 43,
1246     215, 37,  74,  111, 148,
1247     185, 222, 32,  96,  160, 
1248     224,
1249 };
1250
1251 static const int va_to_gen7_vc1_condover[3] = {
1252     0,
1253     2,
1254     3
1255 };
1256
1257 static const int va_to_gen7_vc1_profile[4] = {
1258     GEN7_VC1_SIMPLE_PROFILE,
1259     GEN7_VC1_MAIN_PROFILE,
1260     GEN7_VC1_RESERVED_PROFILE,
1261     GEN7_VC1_ADVANCED_PROFILE
1262 };
1263
1264 static void 
1265 gen8_mfd_free_vc1_surface(void **data)
1266 {
1267     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1268
1269     if (!gen7_vc1_surface)
1270         return;
1271
1272     dri_bo_unreference(gen7_vc1_surface->dmv);
1273     free(gen7_vc1_surface);
1274     *data = NULL;
1275 }
1276
1277 static void
1278 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1279                           VAPictureParameterBufferVC1 *pic_param,
1280                           struct object_surface *obj_surface)
1281 {
1282     struct i965_driver_data *i965 = i965_driver_data(ctx);
1283     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1284     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1285     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1286
1287     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1288
1289     if (!gen7_vc1_surface) {
1290         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1291
1292         if (!gen7_vc1_surface)
1293             return;
1294
1295         assert((obj_surface->size & 0x3f) == 0);
1296         obj_surface->private_data = gen7_vc1_surface;
1297     }
1298
1299     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1300
1301     if (gen7_vc1_surface->dmv == NULL) {
1302         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1303                                              "direct mv w/r buffer",
1304                                              width_in_mbs * height_in_mbs * 64,
1305                                              0x1000);
1306     }
1307 }
1308
1309 static void
1310 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1311                          struct decode_state *decode_state,
1312                          struct gen7_mfd_context *gen7_mfd_context)
1313 {
1314     VAPictureParameterBufferVC1 *pic_param;
1315     struct i965_driver_data *i965 = i965_driver_data(ctx);
1316     struct object_surface *obj_surface;
1317     dri_bo *bo;
1318     int width_in_mbs;
1319     int picture_type;
1320
1321     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1322     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1323     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1324     picture_type = pic_param->picture_fields.bits.picture_type;
1325  
1326     intel_update_vc1_frame_store_index(ctx,
1327                                        decode_state,
1328                                        pic_param,
1329                                        gen7_mfd_context->reference_surface);
1330
1331     /* Current decoded picture */
1332     obj_surface = decode_state->render_object;
1333     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1334     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1335
1336     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1337     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1338     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1339     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1340
1341     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1342     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1343     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1344     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1345
1346     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1347     bo = dri_bo_alloc(i965->intel.bufmgr,
1348                       "intra row store",
1349                       width_in_mbs * 64,
1350                       0x1000);
1351     assert(bo);
1352     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1353     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1354
1355     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1356     bo = dri_bo_alloc(i965->intel.bufmgr,
1357                       "deblocking filter row store",
1358                       width_in_mbs * 7 * 64,
1359                       0x1000);
1360     assert(bo);
1361     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1362     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1363
1364     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1365     bo = dri_bo_alloc(i965->intel.bufmgr,
1366                       "bsd mpc row store",
1367                       width_in_mbs * 96,
1368                       0x1000);
1369     assert(bo);
1370     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1371     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1372
1373     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1374
1375     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1376     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1377     
1378     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1379         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1380         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1381         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1382         int src_w, src_h;
1383         uint8_t *src = NULL, *dst = NULL;
1384
1385         assert(decode_state->bit_plane->buffer);
1386         src = decode_state->bit_plane->buffer;
1387
1388         bo = dri_bo_alloc(i965->intel.bufmgr,
1389                           "VC-1 Bitplane",
1390                           bitplane_width * height_in_mbs,
1391                           0x1000);
1392         assert(bo);
1393         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1394
1395         dri_bo_map(bo, True);
1396         assert(bo->virtual);
1397         dst = bo->virtual;
1398
1399         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1400             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1401                 int src_index, dst_index;
1402                 int src_shift;
1403                 uint8_t src_value;
1404
1405                 src_index = (src_h * width_in_mbs + src_w) / 2;
1406                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1407                 src_value = ((src[src_index] >> src_shift) & 0xf);
1408
1409                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1410                     src_value |= 0x2;
1411                 }
1412
1413                 dst_index = src_w / 2;
1414                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1415             }
1416
1417             if (src_w & 1)
1418                 dst[src_w / 2] >>= 4;
1419
1420             dst += bitplane_width;
1421         }
1422
1423         dri_bo_unmap(bo);
1424     } else
1425         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1426 }
1427
1428 static void
1429 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1430                        struct decode_state *decode_state,
1431                        struct gen7_mfd_context *gen7_mfd_context)
1432 {
1433     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1434     VAPictureParameterBufferVC1 *pic_param;
1435     struct object_surface *obj_surface;
1436     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1437     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1438     int unified_mv_mode;
1439     int ref_field_pic_polarity = 0;
1440     int scale_factor = 0;
1441     int trans_ac_y = 0;
1442     int dmv_surface_valid = 0;
1443     int brfd = 0;
1444     int fcm = 0;
1445     int picture_type;
1446     int profile;
1447     int overlap;
1448     int interpolation_mode = 0;
1449
1450     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1451     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1452
1453     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1454     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1455     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1456     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1457     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1458     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1459     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1460     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1461
1462     if (dquant == 0) {
1463         alt_pquant_config = 0;
1464         alt_pquant_edge_mask = 0;
1465     } else if (dquant == 2) {
1466         alt_pquant_config = 1;
1467         alt_pquant_edge_mask = 0xf;
1468     } else {
1469         assert(dquant == 1);
1470         if (dquantfrm == 0) {
1471             alt_pquant_config = 0;
1472             alt_pquant_edge_mask = 0;
1473             alt_pq = 0;
1474         } else {
1475             assert(dquantfrm == 1);
1476             alt_pquant_config = 1;
1477
1478             switch (dqprofile) {
1479             case 3:
1480                 if (dqbilevel == 0) {
1481                     alt_pquant_config = 2;
1482                     alt_pquant_edge_mask = 0;
1483                 } else {
1484                     assert(dqbilevel == 1);
1485                     alt_pquant_config = 3;
1486                     alt_pquant_edge_mask = 0;
1487                 }
1488                 break;
1489                 
1490             case 0:
1491                 alt_pquant_edge_mask = 0xf;
1492                 break;
1493
1494             case 1:
1495                 if (dqdbedge == 3)
1496                     alt_pquant_edge_mask = 0x9;
1497                 else
1498                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1499
1500                 break;
1501
1502             case 2:
1503                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1504                 break;
1505
1506             default:
1507                 assert(0);
1508             }
1509         }
1510     }
1511
1512     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1513         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1514         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1515     } else {
1516         assert(pic_param->mv_fields.bits.mv_mode < 4);
1517         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1518     }
1519
1520     if (pic_param->sequence_fields.bits.interlace == 1 &&
1521         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1522         /* FIXME: calculate reference field picture polarity */
1523         assert(0);
1524         ref_field_pic_polarity = 0;
1525     }
1526
1527     if (pic_param->b_picture_fraction < 21)
1528         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1529
1530     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1531     
1532     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1533         picture_type == GEN7_VC1_I_PICTURE)
1534         picture_type = GEN7_VC1_BI_PICTURE;
1535
1536     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1537         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1538     else {
1539         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1540
1541         /*
1542          * 8.3.6.2.1 Transform Type Selection
1543          * If variable-sized transform coding is not enabled,
1544          * then the 8x8 transform shall be used for all blocks.
1545          * it is also MFX_VC1_PIC_STATE requirement.
1546          */
1547         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1548             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1549             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1550         }
1551     }
1552
1553     if (picture_type == GEN7_VC1_B_PICTURE) {
1554         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1555
1556         obj_surface = decode_state->reference_objects[1];
1557
1558         if (obj_surface)
1559             gen7_vc1_surface = obj_surface->private_data;
1560
1561         if (!gen7_vc1_surface || 
1562             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1563              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1564             dmv_surface_valid = 0;
1565         else
1566             dmv_surface_valid = 1;
1567     }
1568
1569     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1570
1571     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1572         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1573     else {
1574         if (pic_param->picture_fields.bits.top_field_first)
1575             fcm = 2;
1576         else
1577             fcm = 3;
1578     }
1579
1580     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1581         brfd = pic_param->reference_fields.bits.reference_distance;
1582         brfd = (scale_factor * brfd) >> 8;
1583         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1584
1585         if (brfd < 0)
1586             brfd = 0;
1587     }
1588
1589     overlap = pic_param->sequence_fields.bits.overlap;
1590
1591     if (overlap) {
1592         overlap = 0;
1593         if (profile != GEN7_VC1_ADVANCED_PROFILE){
1594             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1595                 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1596                 overlap = 1;
1597             }
1598         }else {
1599             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1600                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1601                 overlap = 1;
1602             }
1603             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1604                 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1605                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1606                     overlap = 1;
1607                 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1608                            va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1609                     overlap = 1;
1610                 }
1611             }
1612         }
1613     } 
1614
1615     assert(pic_param->conditional_overlap_flag < 3);
1616     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1617
1618     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1619         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1620          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1621         interpolation_mode = 9; /* Half-pel bilinear */
1622     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1623              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1624               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1625         interpolation_mode = 1; /* Half-pel bicubic */
1626     else
1627         interpolation_mode = 0; /* Quarter-pel bicubic */
1628
1629     BEGIN_BCS_BATCH(batch, 6);
1630     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1631     OUT_BCS_BATCH(batch,
1632                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1633                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1634     OUT_BCS_BATCH(batch,
1635                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1636                   dmv_surface_valid << 15 |
1637                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1638                   pic_param->rounding_control << 13 |
1639                   pic_param->sequence_fields.bits.syncmarker << 12 |
1640                   interpolation_mode << 8 |
1641                   0 << 7 | /* FIXME: scale up or down ??? */
1642                   pic_param->range_reduction_frame << 6 |
1643                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1644                   overlap << 4 |
1645                   !pic_param->picture_fields.bits.is_first_field << 3 |
1646                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1647     OUT_BCS_BATCH(batch,
1648                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1649                   picture_type << 26 |
1650                   fcm << 24 |
1651                   alt_pq << 16 |
1652                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1653                   scale_factor << 0);
1654     OUT_BCS_BATCH(batch,
1655                   unified_mv_mode << 28 |
1656                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1657                   pic_param->fast_uvmc_flag << 26 |
1658                   ref_field_pic_polarity << 25 |
1659                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1660                   pic_param->reference_fields.bits.reference_distance << 20 |
1661                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1662                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1663                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1664                   alt_pquant_edge_mask << 4 |
1665                   alt_pquant_config << 2 |
1666                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1667                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1668     OUT_BCS_BATCH(batch,
1669                   !!pic_param->bitplane_present.value << 31 |
1670                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1671                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1672                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1673                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1674                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1675                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1676                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1677                   pic_param->mv_fields.bits.mv_table << 20 |
1678                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1679                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1680                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1681                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1682                   pic_param->mb_mode_table << 8 |
1683                   trans_ac_y << 6 |
1684                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1685                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1686                   pic_param->cbp_table << 0);
1687     ADVANCE_BCS_BATCH(batch);
1688 }
1689
1690 static void
1691 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1692                              struct decode_state *decode_state,
1693                              struct gen7_mfd_context *gen7_mfd_context)
1694 {
1695     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1696     VAPictureParameterBufferVC1 *pic_param;
1697     int intensitycomp_single;
1698
1699     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1700     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1701     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1702
1703     BEGIN_BCS_BATCH(batch, 6);
1704     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1705     OUT_BCS_BATCH(batch,
1706                   0 << 14 | /* FIXME: double ??? */
1707                   0 << 12 |
1708                   intensitycomp_single << 10 |
1709                   intensitycomp_single << 8 |
1710                   0 << 4 | /* FIXME: interlace mode */
1711                   0);
1712     OUT_BCS_BATCH(batch,
1713                   pic_param->luma_shift << 16 |
1714                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1715     OUT_BCS_BATCH(batch, 0);
1716     OUT_BCS_BATCH(batch, 0);
1717     OUT_BCS_BATCH(batch, 0);
1718     ADVANCE_BCS_BATCH(batch);
1719 }
1720
1721 static void
1722 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1723                               struct decode_state *decode_state,
1724                               struct gen7_mfd_context *gen7_mfd_context)
1725 {
1726     struct i965_driver_data *i965 = i965_driver_data(ctx);
1727     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1728     struct object_surface *obj_surface;
1729     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1730
1731     obj_surface = decode_state->render_object;
1732
1733     if (obj_surface && obj_surface->private_data) {
1734         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1735     }
1736
1737     obj_surface = decode_state->reference_objects[1];
1738
1739     if (obj_surface && obj_surface->private_data) {
1740         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1741     }
1742
1743     BEGIN_BCS_BATCH(batch, 7);
1744     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1745
1746     if (dmv_write_buffer)
1747         OUT_BCS_RELOC64(batch, dmv_write_buffer,
1748                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1749                       0);
1750     else {
1751         OUT_BCS_BATCH(batch, 0);
1752         OUT_BCS_BATCH(batch, 0);
1753     }
1754
1755     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1756
1757     if (dmv_read_buffer)
1758         OUT_BCS_RELOC64(batch, dmv_read_buffer,
1759                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1760                       0);
1761     else {
1762         OUT_BCS_BATCH(batch, 0);
1763         OUT_BCS_BATCH(batch, 0);
1764     }
1765     
1766     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1767                   
1768     ADVANCE_BCS_BATCH(batch);
1769 }
1770
1771 static int
1772 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1773 {
1774     int out_slice_data_bit_offset;
1775     int slice_header_size = in_slice_data_bit_offset / 8;
1776     int i, j;
1777
1778     if (profile != 3)
1779         out_slice_data_bit_offset = in_slice_data_bit_offset;
1780     else {
1781         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1782             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1783                 i++, j += 2;
1784             }
1785         }
1786
1787         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1788     }
1789
1790     return out_slice_data_bit_offset;
1791 }
1792
1793 static void
1794 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1795                         VAPictureParameterBufferVC1 *pic_param,
1796                         VASliceParameterBufferVC1 *slice_param,
1797                         VASliceParameterBufferVC1 *next_slice_param,
1798                         dri_bo *slice_data_bo,
1799                         struct gen7_mfd_context *gen7_mfd_context)
1800 {
1801     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1802     int next_slice_start_vert_pos;
1803     int macroblock_offset;
1804     uint8_t *slice_data = NULL;
1805
1806     dri_bo_map(slice_data_bo, 0);
1807     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1808     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1809                                                                slice_param->macroblock_offset,
1810                                                                pic_param->sequence_fields.bits.profile);
1811     dri_bo_unmap(slice_data_bo);
1812
1813     if (next_slice_param)
1814         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1815     else
1816         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1817
1818     BEGIN_BCS_BATCH(batch, 5);
1819     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1820     OUT_BCS_BATCH(batch, 
1821                   slice_param->slice_data_size - (macroblock_offset >> 3));
1822     OUT_BCS_BATCH(batch, 
1823                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1824     OUT_BCS_BATCH(batch,
1825                   slice_param->slice_vertical_position << 16 |
1826                   next_slice_start_vert_pos << 0);
1827     OUT_BCS_BATCH(batch,
1828                   (macroblock_offset & 0x7));
1829     ADVANCE_BCS_BATCH(batch);
1830 }
1831
1832 static void
1833 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1834                             struct decode_state *decode_state,
1835                             struct gen7_mfd_context *gen7_mfd_context)
1836 {
1837     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1838     VAPictureParameterBufferVC1 *pic_param;
1839     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1840     dri_bo *slice_data_bo;
1841     int i, j;
1842
1843     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1844     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1845
1846     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1847     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1848     intel_batchbuffer_emit_mi_flush(batch);
1849     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1850     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1851     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1852     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1853     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1854     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1855     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1856
1857     for (j = 0; j < decode_state->num_slice_params; j++) {
1858         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1859         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1860         slice_data_bo = decode_state->slice_datas[j]->bo;
1861         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1862
1863         if (j == decode_state->num_slice_params - 1)
1864             next_slice_group_param = NULL;
1865         else
1866             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1867
1868         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1869             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1870
1871             if (i < decode_state->slice_params[j]->num_elements - 1)
1872                 next_slice_param = slice_param + 1;
1873             else
1874                 next_slice_param = next_slice_group_param;
1875
1876             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1877             slice_param++;
1878         }
1879     }
1880
1881     intel_batchbuffer_end_atomic(batch);
1882     intel_batchbuffer_flush(batch);
1883 }
1884
1885 static void
1886 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1887                           struct decode_state *decode_state,
1888                           struct gen7_mfd_context *gen7_mfd_context)
1889 {
1890     struct object_surface *obj_surface;
1891     VAPictureParameterBufferJPEGBaseline *pic_param;
1892     int subsampling = SUBSAMPLE_YUV420;
1893     int fourcc = VA_FOURCC_IMC3;
1894
1895     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1896
1897     if (pic_param->num_components == 1) {
1898         subsampling = SUBSAMPLE_YUV400;
1899         fourcc = VA_FOURCC_Y800;
1900     } else if (pic_param->num_components == 3) {
1901         int h1 = pic_param->components[0].h_sampling_factor;
1902         int h2 = pic_param->components[1].h_sampling_factor;
1903         int h3 = pic_param->components[2].h_sampling_factor;
1904         int v1 = pic_param->components[0].v_sampling_factor;
1905         int v2 = pic_param->components[1].v_sampling_factor;
1906         int v3 = pic_param->components[2].v_sampling_factor;
1907
1908         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1909             v1 == 2 && v2 == 1 && v3 == 1) {
1910             subsampling = SUBSAMPLE_YUV420;
1911             fourcc = VA_FOURCC_IMC3;
1912         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1913                    v1 == 1 && v2 == 1 && v3 == 1) {
1914             subsampling = SUBSAMPLE_YUV422H;
1915             fourcc = VA_FOURCC_422H;
1916         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1917                    v1 == 1 && v2 == 1 && v3 == 1) {
1918             subsampling = SUBSAMPLE_YUV444;
1919             fourcc = VA_FOURCC_444P;
1920         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1921                    v1 == 1 && v2 == 1 && v3 == 1) {
1922             subsampling = SUBSAMPLE_YUV411;
1923             fourcc = VA_FOURCC_411P;
1924         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1925                    v1 == 2 && v2 == 1 && v3 == 1) {
1926             subsampling = SUBSAMPLE_YUV422V;
1927             fourcc = VA_FOURCC_422V;
1928         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1929                    v1 == 2 && v2 == 2 && v3 == 2) {
1930             subsampling = SUBSAMPLE_YUV422H;
1931             fourcc = VA_FOURCC_422H;
1932         } else if (h1 == 2 && h2 == 2 && h3 == 2 &&
1933                    v1 == 2 && v2 == 1 && v3 == 1) {
1934             subsampling = SUBSAMPLE_YUV422V;
1935             fourcc = VA_FOURCC_422V;
1936         } else
1937             assert(0);
1938     }
1939     else {
1940         assert(0);
1941     }
1942
1943     /* Current decoded picture */
1944     obj_surface = decode_state->render_object;
1945     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1946
1947     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1948     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1949     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1950     gen7_mfd_context->pre_deblocking_output.valid = 1;
1951
1952     gen7_mfd_context->post_deblocking_output.bo = NULL;
1953     gen7_mfd_context->post_deblocking_output.valid = 0;
1954
1955     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1956     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1957
1958     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1959     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1960
1961     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1962     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1963
1964     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1965     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1966
1967     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1968     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1969 }
1970
1971 static const int va_to_gen7_jpeg_rotation[4] = {
1972     GEN7_JPEG_ROTATION_0,
1973     GEN7_JPEG_ROTATION_90,
1974     GEN7_JPEG_ROTATION_180,
1975     GEN7_JPEG_ROTATION_270
1976 };
1977
1978 static void
1979 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1980                         struct decode_state *decode_state,
1981                         struct gen7_mfd_context *gen7_mfd_context)
1982 {
1983     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1984     VAPictureParameterBufferJPEGBaseline *pic_param;
1985     int chroma_type = GEN7_YUV420;
1986     int frame_width_in_blks;
1987     int frame_height_in_blks;
1988
1989     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1990     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1991
1992     if (pic_param->num_components == 1)
1993         chroma_type = GEN7_YUV400;
1994     else if (pic_param->num_components == 3) {
1995         int h1 = pic_param->components[0].h_sampling_factor;
1996         int h2 = pic_param->components[1].h_sampling_factor;
1997         int h3 = pic_param->components[2].h_sampling_factor;
1998         int v1 = pic_param->components[0].v_sampling_factor;
1999         int v2 = pic_param->components[1].v_sampling_factor;
2000         int v3 = pic_param->components[2].v_sampling_factor;
2001
2002         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2003             v1 == 2 && v2 == 1 && v3 == 1)
2004             chroma_type = GEN7_YUV420;
2005         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2006                  v1 == 1 && v2 == 1 && v3 == 1)
2007             chroma_type = GEN7_YUV422H_2Y;
2008         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2009                  v1 == 1 && v2 == 1 && v3 == 1)
2010             chroma_type = GEN7_YUV444;
2011         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2012                  v1 == 1 && v2 == 1 && v3 == 1)
2013             chroma_type = GEN7_YUV411;
2014         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2015                  v1 == 2 && v2 == 1 && v3 == 1)
2016             chroma_type = GEN7_YUV422V_2Y;
2017         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2018                  v1 == 2 && v2 == 2 && v3 == 2)
2019             chroma_type = GEN7_YUV422H_4Y;
2020         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2021                  v1 == 2 && v2 == 1 && v3 == 1)
2022             chroma_type = GEN7_YUV422V_4Y;
2023         else
2024             assert(0);
2025     }
2026
2027     if (chroma_type == GEN7_YUV400 ||
2028         chroma_type == GEN7_YUV444 ||
2029         chroma_type == GEN7_YUV422V_2Y) {
2030         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2031         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2032     } else if (chroma_type == GEN7_YUV411) {
2033         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2034         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2035     } else {
2036         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2037         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2038     }
2039
2040     BEGIN_BCS_BATCH(batch, 3);
2041     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2042     OUT_BCS_BATCH(batch,
2043                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2044                   (chroma_type << 0));
2045     OUT_BCS_BATCH(batch,
2046                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2047                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2048     ADVANCE_BCS_BATCH(batch);
2049 }
2050
2051 static const int va_to_gen7_jpeg_hufftable[2] = {
2052     MFX_HUFFTABLE_ID_Y,
2053     MFX_HUFFTABLE_ID_UV
2054 };
2055
2056 static void
2057 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2058                                struct decode_state *decode_state,
2059                                struct gen7_mfd_context *gen7_mfd_context,
2060                                int num_tables)
2061 {
2062     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2063     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2064     int index;
2065
2066     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2067         return;
2068
2069     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2070
2071     for (index = 0; index < num_tables; index++) {
2072         int id = va_to_gen7_jpeg_hufftable[index];
2073         if (!huffman_table->load_huffman_table[index])
2074             continue;
2075         BEGIN_BCS_BATCH(batch, 53);
2076         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2077         OUT_BCS_BATCH(batch, id);
2078         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2079         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2080         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2081         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2082         ADVANCE_BCS_BATCH(batch);
2083     }
2084 }
2085
2086 static const int va_to_gen7_jpeg_qm[5] = {
2087     -1,
2088     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2089     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2090     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2091     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2092 };
2093
2094 static void
2095 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2096                        struct decode_state *decode_state,
2097                        struct gen7_mfd_context *gen7_mfd_context)
2098 {
2099     VAPictureParameterBufferJPEGBaseline *pic_param;
2100     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2101     int index;
2102
2103     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2104         return;
2105
2106     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2107     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2108
2109     assert(pic_param->num_components <= 3);
2110
2111     for (index = 0; index < pic_param->num_components; index++) {
2112         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2113         int qm_type;
2114         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2115         unsigned char raster_qm[64];
2116         int j;
2117
2118         if (id > 4 || id < 1)
2119             continue;
2120
2121         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2122             continue;
2123
2124         qm_type = va_to_gen7_jpeg_qm[id];
2125
2126         for (j = 0; j < 64; j++)
2127             raster_qm[zigzag_direct[j]] = qm[j];
2128
2129         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2130     }
2131 }
2132
2133 static void
2134 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2135                          VAPictureParameterBufferJPEGBaseline *pic_param,
2136                          VASliceParameterBufferJPEGBaseline *slice_param,
2137                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2138                          dri_bo *slice_data_bo,
2139                          struct gen7_mfd_context *gen7_mfd_context)
2140 {
2141     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2142     int scan_component_mask = 0;
2143     int i;
2144
2145     assert(slice_param->num_components > 0);
2146     assert(slice_param->num_components < 4);
2147     assert(slice_param->num_components <= pic_param->num_components);
2148
2149     for (i = 0; i < slice_param->num_components; i++) {
2150         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2151         case 1:
2152             scan_component_mask |= (1 << 0);
2153             break;
2154         case 2:
2155             scan_component_mask |= (1 << 1);
2156             break;
2157         case 3:
2158             scan_component_mask |= (1 << 2);
2159             break;
2160         default:
2161             assert(0);
2162             break;
2163         }
2164     }
2165
2166     BEGIN_BCS_BATCH(batch, 6);
2167     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2168     OUT_BCS_BATCH(batch, 
2169                   slice_param->slice_data_size);
2170     OUT_BCS_BATCH(batch, 
2171                   slice_param->slice_data_offset);
2172     OUT_BCS_BATCH(batch,
2173                   slice_param->slice_horizontal_position << 16 |
2174                   slice_param->slice_vertical_position << 0);
2175     OUT_BCS_BATCH(batch,
2176                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2177                   (scan_component_mask << 27) |                 /* scan components */
2178                   (0 << 26) |   /* disable interrupt allowed */
2179                   (slice_param->num_mcus << 0));                /* MCU count */
2180     OUT_BCS_BATCH(batch,
2181                   (slice_param->restart_interval << 0));    /* RestartInterval */
2182     ADVANCE_BCS_BATCH(batch);
2183 }
2184
2185 /* Workaround for JPEG decoding on Ivybridge */
2186 #ifdef JPEG_WA
2187
2188 static struct {
2189     int width;
2190     int height;
2191     unsigned char data[32];
2192     int data_size;
2193     int data_bit_offset;
2194     int qp;
2195 } gen7_jpeg_wa_clip = {
2196     16,
2197     16,
2198     {
2199         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2200         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2201     },
2202     14,
2203     40,
2204     28,
2205 };
2206
2207 static void
2208 gen8_jpeg_wa_init(VADriverContextP ctx,
2209                   struct gen7_mfd_context *gen7_mfd_context)
2210 {
2211     struct i965_driver_data *i965 = i965_driver_data(ctx);
2212     VAStatus status;
2213     struct object_surface *obj_surface;
2214
2215     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2216         i965_DestroySurfaces(ctx,
2217                              &gen7_mfd_context->jpeg_wa_surface_id,
2218                              1);
2219
2220     status = i965_CreateSurfaces(ctx,
2221                                  gen7_jpeg_wa_clip.width,
2222                                  gen7_jpeg_wa_clip.height,
2223                                  VA_RT_FORMAT_YUV420,
2224                                  1,
2225                                  &gen7_mfd_context->jpeg_wa_surface_id);
2226     assert(status == VA_STATUS_SUCCESS);
2227
2228     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2229     assert(obj_surface);
2230     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2231     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2232
2233     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2234         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2235                                                                "JPEG WA data",
2236                                                                0x1000,
2237                                                                0x1000);
2238         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2239                        0,
2240                        gen7_jpeg_wa_clip.data_size,
2241                        gen7_jpeg_wa_clip.data);
2242     }
2243 }
2244
2245 static void
2246 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2247                               struct gen7_mfd_context *gen7_mfd_context)
2248 {
2249     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2250
2251     BEGIN_BCS_BATCH(batch, 5);
2252     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2253     OUT_BCS_BATCH(batch,
2254                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2255                   (MFD_MODE_VLD << 15) | /* VLD mode */
2256                   (0 << 10) | /* disable Stream-Out */
2257                   (0 << 9)  | /* Post Deblocking Output */
2258                   (1 << 8)  | /* Pre Deblocking Output */
2259                   (0 << 5)  | /* not in stitch mode */
2260                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2261                   (MFX_FORMAT_AVC << 0));
2262     OUT_BCS_BATCH(batch,
2263                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2264                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2265                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2266                   (0 << 1)  |
2267                   (0 << 0));
2268     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2269     OUT_BCS_BATCH(batch, 0); /* reserved */
2270     ADVANCE_BCS_BATCH(batch);
2271 }
2272
2273 static void
2274 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2275                            struct gen7_mfd_context *gen7_mfd_context)
2276 {
2277     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2278     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2279
2280     BEGIN_BCS_BATCH(batch, 6);
2281     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2282     OUT_BCS_BATCH(batch, 0);
2283     OUT_BCS_BATCH(batch,
2284                   ((obj_surface->orig_width - 1) << 18) |
2285                   ((obj_surface->orig_height - 1) << 4));
2286     OUT_BCS_BATCH(batch,
2287                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2288                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2289                   (0 << 22) | /* surface object control state, ignored */
2290                   ((obj_surface->width - 1) << 3) | /* pitch */
2291                   (0 << 2)  | /* must be 0 */
2292                   (1 << 1)  | /* must be tiled */
2293                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2294     OUT_BCS_BATCH(batch,
2295                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2296                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2297     OUT_BCS_BATCH(batch,
2298                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2299                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2300     ADVANCE_BCS_BATCH(batch);
2301 }
2302
2303 static void
2304 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2305                                  struct gen7_mfd_context *gen7_mfd_context)
2306 {
2307     struct i965_driver_data *i965 = i965_driver_data(ctx);
2308     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2309     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2310     dri_bo *intra_bo;
2311     int i;
2312
2313     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2314                             "intra row store",
2315                             128 * 64,
2316                             0x1000);
2317
2318     BEGIN_BCS_BATCH(batch, 61);
2319     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2320     OUT_BCS_RELOC64(batch,
2321                   obj_surface->bo,
2322                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2323                   0);
2324     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2325     
2326
2327     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2328         OUT_BCS_BATCH(batch, 0);
2329         OUT_BCS_BATCH(batch, 0);
2330
2331         /* uncompressed-video & stream out 7-12 */
2332     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2333     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2334         OUT_BCS_BATCH(batch, 0);
2335         OUT_BCS_BATCH(batch, 0);
2336         OUT_BCS_BATCH(batch, 0);
2337         OUT_BCS_BATCH(batch, 0);
2338
2339         /* the DW 13-15 is for intra row store scratch */
2340     OUT_BCS_RELOC64(batch,
2341                   intra_bo,
2342                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2343                   0);
2344
2345     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2346
2347         /* the DW 16-18 is for deblocking filter */ 
2348     OUT_BCS_BATCH(batch, 0);
2349         OUT_BCS_BATCH(batch, 0);
2350         OUT_BCS_BATCH(batch, 0);
2351
2352     /* DW 19..50 */
2353     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2354         OUT_BCS_BATCH(batch, 0);
2355         OUT_BCS_BATCH(batch, 0);
2356     }
2357     OUT_BCS_BATCH(batch, 0);
2358
2359         /* the DW52-54 is for mb status address */
2360     OUT_BCS_BATCH(batch, 0);
2361         OUT_BCS_BATCH(batch, 0);
2362         OUT_BCS_BATCH(batch, 0);
2363         /* the DW56-60 is for ILDB & second ILDB address */
2364     OUT_BCS_BATCH(batch, 0);
2365         OUT_BCS_BATCH(batch, 0);
2366         OUT_BCS_BATCH(batch, 0);
2367     OUT_BCS_BATCH(batch, 0);
2368         OUT_BCS_BATCH(batch, 0);
2369         OUT_BCS_BATCH(batch, 0);
2370
2371     ADVANCE_BCS_BATCH(batch);
2372
2373     dri_bo_unreference(intra_bo);
2374 }
2375
2376 static void
2377 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2378                                      struct gen7_mfd_context *gen7_mfd_context)
2379 {
2380     struct i965_driver_data *i965 = i965_driver_data(ctx);
2381     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2382     dri_bo *bsd_mpc_bo, *mpr_bo;
2383
2384     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2385                               "bsd mpc row store",
2386                               11520, /* 1.5 * 120 * 64 */
2387                               0x1000);
2388
2389     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2390                           "mpr row store",
2391                           7680, /* 1. 0 * 120 * 64 */
2392                           0x1000);
2393
2394     BEGIN_BCS_BATCH(batch, 10);
2395     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2396
2397     OUT_BCS_RELOC64(batch,
2398                   bsd_mpc_bo,
2399                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2400                   0);
2401
2402     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2403
2404     OUT_BCS_RELOC64(batch,
2405                   mpr_bo,
2406                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2407                   0);
2408     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2409
2410     OUT_BCS_BATCH(batch, 0);
2411     OUT_BCS_BATCH(batch, 0);
2412     OUT_BCS_BATCH(batch, 0);
2413
2414     ADVANCE_BCS_BATCH(batch);
2415
2416     dri_bo_unreference(bsd_mpc_bo);
2417     dri_bo_unreference(mpr_bo);
2418 }
2419
2420 static void
2421 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2422                           struct gen7_mfd_context *gen7_mfd_context)
2423 {
2424
2425 }
2426
2427 static void
2428 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2429                            struct gen7_mfd_context *gen7_mfd_context)
2430 {
2431     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2432     int img_struct = 0;
2433     int mbaff_frame_flag = 0;
2434     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2435
2436     BEGIN_BCS_BATCH(batch, 16);
2437     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2438     OUT_BCS_BATCH(batch, 
2439                   width_in_mbs * height_in_mbs);
2440     OUT_BCS_BATCH(batch, 
2441                   ((height_in_mbs - 1) << 16) | 
2442                   ((width_in_mbs - 1) << 0));
2443     OUT_BCS_BATCH(batch, 
2444                   (0 << 24) |
2445                   (0 << 16) |
2446                   (0 << 14) |
2447                   (0 << 13) |
2448                   (0 << 12) | /* differ from GEN6 */
2449                   (0 << 10) |
2450                   (img_struct << 8));
2451     OUT_BCS_BATCH(batch,
2452                   (1 << 10) | /* 4:2:0 */
2453                   (1 << 7) |  /* CABAC */
2454                   (0 << 6) |
2455                   (0 << 5) |
2456                   (0 << 4) |
2457                   (0 << 3) |
2458                   (1 << 2) |
2459                   (mbaff_frame_flag << 1) |
2460                   (0 << 0));
2461     OUT_BCS_BATCH(batch, 0);
2462     OUT_BCS_BATCH(batch, 0);
2463     OUT_BCS_BATCH(batch, 0);
2464     OUT_BCS_BATCH(batch, 0);
2465     OUT_BCS_BATCH(batch, 0);
2466     OUT_BCS_BATCH(batch, 0);
2467     OUT_BCS_BATCH(batch, 0);
2468     OUT_BCS_BATCH(batch, 0);
2469     OUT_BCS_BATCH(batch, 0);
2470     OUT_BCS_BATCH(batch, 0);
2471     OUT_BCS_BATCH(batch, 0);
2472     ADVANCE_BCS_BATCH(batch);
2473 }
2474
2475 static void
2476 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2477                                   struct gen7_mfd_context *gen7_mfd_context)
2478 {
2479     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2480     int i;
2481
2482     BEGIN_BCS_BATCH(batch, 71);
2483     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2484
2485     /* reference surfaces 0..15 */
2486     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2487         OUT_BCS_BATCH(batch, 0); /* top */
2488         OUT_BCS_BATCH(batch, 0); /* bottom */
2489     }
2490         
2491         OUT_BCS_BATCH(batch, 0);
2492
2493     /* the current decoding frame/field */
2494     OUT_BCS_BATCH(batch, 0); /* top */
2495     OUT_BCS_BATCH(batch, 0);
2496     OUT_BCS_BATCH(batch, 0);
2497
2498     /* POC List */
2499     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2500         OUT_BCS_BATCH(batch, 0);
2501         OUT_BCS_BATCH(batch, 0);
2502     }
2503
2504     OUT_BCS_BATCH(batch, 0);
2505     OUT_BCS_BATCH(batch, 0);
2506
2507     ADVANCE_BCS_BATCH(batch);
2508 }
2509
2510 static void
2511 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2512                                      struct gen7_mfd_context *gen7_mfd_context)
2513 {
2514     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2515
2516     BEGIN_BCS_BATCH(batch, 11);
2517     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2518     OUT_BCS_RELOC64(batch,
2519                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2520                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2521                   0);
2522     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2523     OUT_BCS_BATCH(batch, 0);
2524     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2525     OUT_BCS_BATCH(batch, 0);
2526     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2527     OUT_BCS_BATCH(batch, 0);
2528     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2529     OUT_BCS_BATCH(batch, 0);
2530     ADVANCE_BCS_BATCH(batch);
2531 }
2532
2533 static void
2534 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2535                             struct gen7_mfd_context *gen7_mfd_context)
2536 {
2537     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2538
2539     /* the input bitsteam format on GEN7 differs from GEN6 */
2540     BEGIN_BCS_BATCH(batch, 6);
2541     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2542     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2543     OUT_BCS_BATCH(batch, 0);
2544     OUT_BCS_BATCH(batch,
2545                   (0 << 31) |
2546                   (0 << 14) |
2547                   (0 << 12) |
2548                   (0 << 10) |
2549                   (0 << 8));
2550     OUT_BCS_BATCH(batch,
2551                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2552                   (0 << 5)  |
2553                   (0 << 4)  |
2554                   (1 << 3) | /* LastSlice Flag */
2555                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2556     OUT_BCS_BATCH(batch, 0);
2557     ADVANCE_BCS_BATCH(batch);
2558 }
2559
2560 static void
2561 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2562                              struct gen7_mfd_context *gen7_mfd_context)
2563 {
2564     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2565     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2566     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2567     int first_mb_in_slice = 0;
2568     int slice_type = SLICE_TYPE_I;
2569
2570     BEGIN_BCS_BATCH(batch, 11);
2571     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2572     OUT_BCS_BATCH(batch, slice_type);
2573     OUT_BCS_BATCH(batch, 
2574                   (num_ref_idx_l1 << 24) |
2575                   (num_ref_idx_l0 << 16) |
2576                   (0 << 8) |
2577                   (0 << 0));
2578     OUT_BCS_BATCH(batch, 
2579                   (0 << 29) |
2580                   (1 << 27) |   /* disable Deblocking */
2581                   (0 << 24) |
2582                   (gen7_jpeg_wa_clip.qp << 16) |
2583                   (0 << 8) |
2584                   (0 << 0));
2585     OUT_BCS_BATCH(batch, 
2586                   (slice_ver_pos << 24) |
2587                   (slice_hor_pos << 16) | 
2588                   (first_mb_in_slice << 0));
2589     OUT_BCS_BATCH(batch,
2590                   (next_slice_ver_pos << 16) |
2591                   (next_slice_hor_pos << 0));
2592     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2593     OUT_BCS_BATCH(batch, 0);
2594     OUT_BCS_BATCH(batch, 0);
2595     OUT_BCS_BATCH(batch, 0);
2596     OUT_BCS_BATCH(batch, 0);
2597     ADVANCE_BCS_BATCH(batch);
2598 }
2599
2600 static void
2601 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2602                  struct gen7_mfd_context *gen7_mfd_context)
2603 {
2604     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2605     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2606     intel_batchbuffer_emit_mi_flush(batch);
2607     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2608     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2609     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2610     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2611     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2612     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2613     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2614
2615     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2616     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2617     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2618 }
2619
2620 #endif
2621
2622 void
2623 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2624                              struct decode_state *decode_state,
2625                              struct gen7_mfd_context *gen7_mfd_context)
2626 {
2627     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2628     VAPictureParameterBufferJPEGBaseline *pic_param;
2629     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2630     dri_bo *slice_data_bo;
2631     int i, j, max_selector = 0;
2632
2633     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2634     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2635
2636     /* Currently only support Baseline DCT */
2637     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2638     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2639 #ifdef JPEG_WA
2640     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2641 #endif
2642     intel_batchbuffer_emit_mi_flush(batch);
2643     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2644     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2645     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2646     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2647     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2648
2649     for (j = 0; j < decode_state->num_slice_params; j++) {
2650         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2651         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2652         slice_data_bo = decode_state->slice_datas[j]->bo;
2653         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2654
2655         if (j == decode_state->num_slice_params - 1)
2656             next_slice_group_param = NULL;
2657         else
2658             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2659
2660         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2661             int component;
2662
2663             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2664
2665             if (i < decode_state->slice_params[j]->num_elements - 1)
2666                 next_slice_param = slice_param + 1;
2667             else
2668                 next_slice_param = next_slice_group_param;
2669
2670             for (component = 0; component < slice_param->num_components; component++) {
2671                 if (max_selector < slice_param->components[component].dc_table_selector)
2672                     max_selector = slice_param->components[component].dc_table_selector;
2673
2674                 if (max_selector < slice_param->components[component].ac_table_selector)
2675                     max_selector = slice_param->components[component].ac_table_selector;
2676             }
2677
2678             slice_param++;
2679         }
2680     }
2681
2682     assert(max_selector < 2);
2683     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2684
2685     for (j = 0; j < decode_state->num_slice_params; j++) {
2686         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2687         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2688         slice_data_bo = decode_state->slice_datas[j]->bo;
2689         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2690
2691         if (j == decode_state->num_slice_params - 1)
2692             next_slice_group_param = NULL;
2693         else
2694             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2695
2696         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2697             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2698
2699             if (i < decode_state->slice_params[j]->num_elements - 1)
2700                 next_slice_param = slice_param + 1;
2701             else
2702                 next_slice_param = next_slice_group_param;
2703
2704             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2705             slice_param++;
2706         }
2707     }
2708
2709     intel_batchbuffer_end_atomic(batch);
2710     intel_batchbuffer_flush(batch);
2711 }
2712
2713 static const int vp8_dc_qlookup[128] =
2714 {
2715       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2716      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2717      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2718      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2719      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2720      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2721      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2722     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2723 };
2724
2725 static const int vp8_ac_qlookup[128] =
2726 {
2727       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2728      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2729      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2730      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2731      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2732     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2733     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2734     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2735 };
2736
2737 static inline unsigned int vp8_clip_quantization_index(int index)
2738 {
2739     if(index > 127)
2740         return 127;
2741     else if(index <0)
2742         return 0;
2743
2744     return index;
2745 }
2746
2747 static void
2748 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2749                           struct decode_state *decode_state,
2750                           struct gen7_mfd_context *gen7_mfd_context)
2751 {
2752     struct object_surface *obj_surface;
2753     struct i965_driver_data *i965 = i965_driver_data(ctx);
2754     dri_bo *bo;
2755     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2756     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2757     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2758
2759     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2760     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2761
2762     intel_update_vp8_frame_store_index(ctx,
2763                                        decode_state,
2764                                        pic_param,
2765                                        gen7_mfd_context->reference_surface);
2766
2767     /* Current decoded picture */
2768     obj_surface = decode_state->render_object;
2769     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2770
2771     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2772     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2773     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2774     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2775
2776     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2777     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2778     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2779     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2780
2781     intel_ensure_vp8_segmentation_buffer(ctx,
2782         &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2783
2784     /* The same as AVC */
2785     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2786     bo = dri_bo_alloc(i965->intel.bufmgr,
2787                       "intra row store",
2788                       width_in_mbs * 64,
2789                       0x1000);
2790     assert(bo);
2791     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2792     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2793
2794     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2795     bo = dri_bo_alloc(i965->intel.bufmgr,
2796                       "deblocking filter row store",
2797                       width_in_mbs * 64 * 4,
2798                       0x1000);
2799     assert(bo);
2800     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2801     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2802
2803     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2804     bo = dri_bo_alloc(i965->intel.bufmgr,
2805                       "bsd mpc row store",
2806                       width_in_mbs * 64 * 2,
2807                       0x1000);
2808     assert(bo);
2809     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2810     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2811
2812     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2813     bo = dri_bo_alloc(i965->intel.bufmgr,
2814                       "mpr row store",
2815                       width_in_mbs * 64 * 2,
2816                       0x1000);
2817     assert(bo);
2818     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2819     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2820
2821     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2822 }
2823
2824 static void
2825 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2826                        struct decode_state *decode_state,
2827                        struct gen7_mfd_context *gen7_mfd_context)
2828 {
2829     struct i965_driver_data *i965 = i965_driver_data(ctx);
2830     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2831     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2832     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2833     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2834     dri_bo *probs_bo = decode_state->probability_data->bo;
2835     int i, j,log2num;
2836     unsigned int quantization_value[4][6];
2837
2838     /* There is no safe way to error out if the segmentation buffer
2839        could not be allocated. So, instead of aborting, simply decode
2840        something even if the result may look totally inacurate */
2841     const unsigned int enable_segmentation =
2842         pic_param->pic_fields.bits.segmentation_enabled &&
2843         gen7_mfd_context->segmentation_buffer.valid;
2844         
2845     log2num = (int)log2(slice_param->num_of_partitions - 1);
2846
2847     BEGIN_BCS_BATCH(batch, 38);
2848     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2849     OUT_BCS_BATCH(batch,
2850                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2851                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2852     OUT_BCS_BATCH(batch,
2853                   log2num << 24 |
2854                   pic_param->pic_fields.bits.sharpness_level << 16 |
2855                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2856                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2857                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2858                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2859                   (enable_segmentation &&
2860                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2861                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2862                   (enable_segmentation &&
2863                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2864                   (enable_segmentation &&
2865                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2866                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2867                   pic_param->pic_fields.bits.filter_type << 4 |
2868                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2869                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2870
2871     OUT_BCS_BATCH(batch,
2872                   pic_param->loop_filter_level[3] << 24 |
2873                   pic_param->loop_filter_level[2] << 16 |
2874                   pic_param->loop_filter_level[1] <<  8 |
2875                   pic_param->loop_filter_level[0] <<  0);
2876
2877     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2878     for (i = 0; i < 4; i++) {
2879                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2880                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2881                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2882                 /* 101581>>16 is equivalent to 155/100 */
2883                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2884                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2885                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2886
2887                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2888                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2889
2890                 OUT_BCS_BATCH(batch,
2891                       quantization_value[i][0] << 16 | /* Y1AC */
2892                       quantization_value[i][1] <<  0); /* Y1DC */
2893         OUT_BCS_BATCH(batch,
2894                       quantization_value[i][5] << 16 | /* UVAC */
2895                       quantization_value[i][4] <<  0); /* UVDC */
2896         OUT_BCS_BATCH(batch,
2897                       quantization_value[i][3] << 16 | /* Y2AC */
2898                       quantization_value[i][2] <<  0); /* Y2DC */
2899     }
2900
2901     /* CoeffProbability table for non-key frame, DW16-DW18 */
2902     if (probs_bo) {
2903         OUT_BCS_RELOC64(batch, probs_bo,
2904                       0, I915_GEM_DOMAIN_INSTRUCTION,
2905                       0);
2906         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2907     } else {
2908         OUT_BCS_BATCH(batch, 0);
2909         OUT_BCS_BATCH(batch, 0);
2910         OUT_BCS_BATCH(batch, 0);
2911     }
2912
2913     OUT_BCS_BATCH(batch,
2914                   pic_param->mb_segment_tree_probs[2] << 16 |
2915                   pic_param->mb_segment_tree_probs[1] <<  8 |
2916                   pic_param->mb_segment_tree_probs[0] <<  0);
2917
2918     OUT_BCS_BATCH(batch,
2919                   pic_param->prob_skip_false << 24 |
2920                   pic_param->prob_intra      << 16 |
2921                   pic_param->prob_last       <<  8 |
2922                   pic_param->prob_gf         <<  0);
2923
2924     OUT_BCS_BATCH(batch,
2925                   pic_param->y_mode_probs[3] << 24 |
2926                   pic_param->y_mode_probs[2] << 16 |
2927                   pic_param->y_mode_probs[1] <<  8 |
2928                   pic_param->y_mode_probs[0] <<  0);
2929
2930     OUT_BCS_BATCH(batch,
2931                   pic_param->uv_mode_probs[2] << 16 |
2932                   pic_param->uv_mode_probs[1] <<  8 |
2933                   pic_param->uv_mode_probs[0] <<  0);
2934     
2935     /* MV update value, DW23-DW32 */
2936     for (i = 0; i < 2; i++) {
2937         for (j = 0; j < 20; j += 4) {
2938             OUT_BCS_BATCH(batch,
2939                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2940                           pic_param->mv_probs[i][j + 2] << 16 |
2941                           pic_param->mv_probs[i][j + 1] <<  8 |
2942                           pic_param->mv_probs[i][j + 0] <<  0);
2943         }
2944     }
2945
2946     OUT_BCS_BATCH(batch,
2947                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2948                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2949                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2950                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2951
2952     OUT_BCS_BATCH(batch,
2953                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2954                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2955                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2956                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2957
2958     /* segmentation id stream base address, DW35-DW37 */
2959     if (enable_segmentation) {
2960         OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
2961                       0, I915_GEM_DOMAIN_INSTRUCTION,
2962                       0);
2963         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2964     }
2965     else {
2966         OUT_BCS_BATCH(batch, 0);
2967         OUT_BCS_BATCH(batch, 0);
2968         OUT_BCS_BATCH(batch, 0);
2969     }
2970     ADVANCE_BCS_BATCH(batch);
2971 }
2972
2973 static void
2974 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2975                         VAPictureParameterBufferVP8 *pic_param,
2976                         VASliceParameterBufferVP8 *slice_param,
2977                         dri_bo *slice_data_bo,
2978                         struct gen7_mfd_context *gen7_mfd_context)
2979 {
2980     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2981     int i, log2num;
2982     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2983     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2984     unsigned int partition_size_0 = slice_param->partition_size[0];
2985
2986     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2987     if (used_bits == 8) {
2988         used_bits = 0;
2989         offset += 1;
2990         partition_size_0 -= 1;
2991     }
2992
2993     assert(slice_param->num_of_partitions >= 2);
2994     assert(slice_param->num_of_partitions <= 9);
2995
2996     log2num = (int)log2(slice_param->num_of_partitions - 1);
2997
2998     BEGIN_BCS_BATCH(batch, 22);
2999     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
3000     OUT_BCS_BATCH(batch,
3001                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
3002                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
3003                   log2num << 4 |
3004                   (slice_param->macroblock_offset & 0x7));
3005     OUT_BCS_BATCH(batch,
3006                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3007                   0);
3008
3009     OUT_BCS_BATCH(batch, partition_size_0 + 1);
3010     OUT_BCS_BATCH(batch, offset);
3011     //partion sizes in bytes are present after the above first partition when there are more than one token partition
3012     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3013     for (i = 1; i < 9; i++) {
3014         if (i < slice_param->num_of_partitions) {
3015             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3016             OUT_BCS_BATCH(batch, offset);
3017         } else {
3018             OUT_BCS_BATCH(batch, 0);
3019             OUT_BCS_BATCH(batch, 0);
3020         }
3021
3022         offset += slice_param->partition_size[i];
3023     }
3024
3025     OUT_BCS_BATCH(batch, 0); /* concealment method */
3026
3027     ADVANCE_BCS_BATCH(batch);
3028 }
3029
3030 void
3031 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3032                             struct decode_state *decode_state,
3033                             struct gen7_mfd_context *gen7_mfd_context)
3034 {
3035     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3036     VAPictureParameterBufferVP8 *pic_param;
3037     VASliceParameterBufferVP8 *slice_param;
3038     dri_bo *slice_data_bo;
3039
3040     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3041     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3042
3043     /* one slice per frame */
3044     if (decode_state->num_slice_params != 1 ||
3045         (!decode_state->slice_params ||
3046          !decode_state->slice_params[0] ||
3047          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3048         (!decode_state->slice_datas ||
3049          !decode_state->slice_datas[0] ||
3050          !decode_state->slice_datas[0]->bo) ||
3051         !decode_state->probability_data) {
3052         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3053
3054         return;
3055     }
3056
3057     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3058     slice_data_bo = decode_state->slice_datas[0]->bo;
3059
3060     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3061     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3062     intel_batchbuffer_emit_mi_flush(batch);
3063     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3064     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3065     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3066     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3067     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3068     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3069     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3070     intel_batchbuffer_end_atomic(batch);
3071     intel_batchbuffer_flush(batch);
3072 }
3073
3074 static VAStatus
3075 gen8_mfd_decode_picture(VADriverContextP ctx, 
3076                         VAProfile profile, 
3077                         union codec_state *codec_state,
3078                         struct hw_context *hw_context)
3079
3080 {
3081     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3082     struct decode_state *decode_state = &codec_state->decode;
3083     VAStatus vaStatus;
3084
3085     assert(gen7_mfd_context);
3086
3087     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3088
3089     if (vaStatus != VA_STATUS_SUCCESS)
3090         goto out;
3091
3092     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3093
3094     switch (profile) {
3095     case VAProfileMPEG2Simple:
3096     case VAProfileMPEG2Main:
3097         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3098         break;
3099         
3100     case VAProfileH264ConstrainedBaseline:
3101     case VAProfileH264Main:
3102     case VAProfileH264High:
3103     case VAProfileH264StereoHigh:
3104     case VAProfileH264MultiviewHigh:
3105         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3106         break;
3107
3108     case VAProfileVC1Simple:
3109     case VAProfileVC1Main:
3110     case VAProfileVC1Advanced:
3111         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3112         break;
3113
3114     case VAProfileJPEGBaseline:
3115         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3116         break;
3117
3118     case VAProfileVP8Version0_3:
3119         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3120         break;
3121
3122     default:
3123         assert(0);
3124         break;
3125     }
3126
3127     vaStatus = VA_STATUS_SUCCESS;
3128
3129 out:
3130     return vaStatus;
3131 }
3132
3133 static void
3134 gen8_mfd_context_destroy(void *hw_context)
3135 {
3136     VADriverContextP ctx;
3137     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3138
3139     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3140
3141     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3142     gen7_mfd_context->post_deblocking_output.bo = NULL;
3143
3144     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3145     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3146
3147     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3148     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3149
3150     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3151     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3152
3153     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3154     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3155
3156     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3157     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3158
3159     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3160     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3161
3162     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3163     gen7_mfd_context->segmentation_buffer.bo = NULL;
3164
3165     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3166
3167     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3168         i965_DestroySurfaces(ctx,
3169                              &gen7_mfd_context->jpeg_wa_surface_id,
3170                              1);
3171         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3172     }
3173
3174     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3175     free(gen7_mfd_context);
3176 }
3177
3178 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3179                                     struct gen7_mfd_context *gen7_mfd_context)
3180 {
3181     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3182     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3183     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3184     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3185 }
3186
3187 struct hw_context *
3188 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3189 {
3190     struct intel_driver_data *intel = intel_driver_data(ctx);
3191     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3192     int i;
3193
3194     if (!gen7_mfd_context)
3195         return NULL;
3196
3197     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3198     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3199     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3200
3201     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3202         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3203         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3204     }
3205
3206     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3207     gen7_mfd_context->segmentation_buffer.valid = 0;
3208
3209     switch (obj_config->profile) {
3210     case VAProfileMPEG2Simple:
3211     case VAProfileMPEG2Main:
3212         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3213         break;
3214
3215     case VAProfileH264ConstrainedBaseline:
3216     case VAProfileH264Main:
3217     case VAProfileH264High:
3218     case VAProfileH264StereoHigh:
3219     case VAProfileH264MultiviewHigh:
3220         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3221         break;
3222     default:
3223         break;
3224     }
3225
3226     gen7_mfd_context->driver_context = ctx;
3227     return (struct hw_context *)gen7_mfd_context;
3228 }