OSDN Git Service

Fix a vp8 decoder picture parameter error
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77         assert((obj_surface->size & 0x3f) == 0);
78         obj_surface->private_data = gen7_avc_surface;
79     }
80
81     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
82                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
83
84     if (gen7_avc_surface->dmv_top == NULL) {
85         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86                                                  "direct mv w/r buffer",
87                                                  width_in_mbs * height_in_mbs * 128,
88                                                  0x1000);
89         assert(gen7_avc_surface->dmv_top);
90     }
91
92     if (gen7_avc_surface->dmv_bottom_flag &&
93         gen7_avc_surface->dmv_bottom == NULL) {
94         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
95                                                     "direct mv w/r buffer",
96                                                     width_in_mbs * height_in_mbs * 128,                                                    
97                                                     0x1000);
98         assert(gen7_avc_surface->dmv_bottom);
99     }
100 }
101
102 static void
103 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
104                           struct decode_state *decode_state,
105                           int standard_select,
106                           struct gen7_mfd_context *gen7_mfd_context)
107 {
108     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
109
110     assert(standard_select == MFX_FORMAT_MPEG2 ||
111            standard_select == MFX_FORMAT_AVC ||
112            standard_select == MFX_FORMAT_VC1 ||
113            standard_select == MFX_FORMAT_JPEG ||
114            standard_select == MFX_FORMAT_VP8);
115
116     BEGIN_BCS_BATCH(batch, 5);
117     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
118     OUT_BCS_BATCH(batch,
119                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
120                   (MFD_MODE_VLD << 15) | /* VLD mode */
121                   (0 << 10) | /* disable Stream-Out */
122                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
123                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
124                   (0 << 5)  | /* not in stitch mode */
125                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
126                   (standard_select << 0));
127     OUT_BCS_BATCH(batch,
128                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
129                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
130                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
131                   (0 << 1)  |
132                   (0 << 0));
133     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
134     OUT_BCS_BATCH(batch, 0); /* reserved */
135     ADVANCE_BCS_BATCH(batch);
136 }
137
138 static void
139 gen8_mfd_surface_state(VADriverContextP ctx,
140                        struct decode_state *decode_state,
141                        int standard_select,
142                        struct gen7_mfd_context *gen7_mfd_context)
143 {
144     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
145     struct object_surface *obj_surface = decode_state->render_object;
146     unsigned int y_cb_offset;
147     unsigned int y_cr_offset;
148
149     assert(obj_surface);
150
151     y_cb_offset = obj_surface->y_cb_offset;
152     y_cr_offset = obj_surface->y_cr_offset;
153
154     BEGIN_BCS_BATCH(batch, 6);
155     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
156     OUT_BCS_BATCH(batch, 0);
157     OUT_BCS_BATCH(batch,
158                   ((obj_surface->orig_height - 1) << 18) |
159                   ((obj_surface->orig_width - 1) << 4));
160     OUT_BCS_BATCH(batch,
161                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
162                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
163                   (0 << 22) | /* surface object control state, ignored */
164                   ((obj_surface->width - 1) << 3) | /* pitch */
165                   (0 << 2)  | /* must be 0 */
166                   (1 << 1)  | /* must be tiled */
167                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
168     OUT_BCS_BATCH(batch,
169                   (0 << 16) | /* X offset for U(Cb), must be 0 */
170                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
171     OUT_BCS_BATCH(batch,
172                   (0 << 16) | /* X offset for V(Cr), must be 0 */
173                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
174     ADVANCE_BCS_BATCH(batch);
175 }
176
177 static void
178 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
179                              struct decode_state *decode_state,
180                              int standard_select,
181                              struct gen7_mfd_context *gen7_mfd_context)
182 {
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188         /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                       0);
193     else
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197         OUT_BCS_BATCH(batch, 0);
198         /* Post-debloing 4-6 */
199     if (gen7_mfd_context->post_deblocking_output.valid)
200         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
201                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202                       0);
203     else
204         OUT_BCS_BATCH(batch, 0);
205
206         OUT_BCS_BATCH(batch, 0);
207         OUT_BCS_BATCH(batch, 0);
208
209         /* uncompressed-video & stream out 7-12 */
210     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
211     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
212         OUT_BCS_BATCH(batch, 0);
213         OUT_BCS_BATCH(batch, 0);
214         OUT_BCS_BATCH(batch, 0);
215         OUT_BCS_BATCH(batch, 0);
216
217         /* intra row-store scratch 13-15 */
218     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
219         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
220                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
221                       0);
222     else
223         OUT_BCS_BATCH(batch, 0);
224
225         OUT_BCS_BATCH(batch, 0);
226         OUT_BCS_BATCH(batch, 0);
227         /* deblocking-filter-row-store 16-18 */
228     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
229         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
230                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
231                       0);
232     else
233         OUT_BCS_BATCH(batch, 0);
234         OUT_BCS_BATCH(batch, 0);
235         OUT_BCS_BATCH(batch, 0);
236
237     /* DW 19..50 */
238     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
239         struct object_surface *obj_surface;
240
241         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
242             gen7_mfd_context->reference_surface[i].obj_surface &&
243             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
244             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
245
246             OUT_BCS_RELOC(batch, obj_surface->bo,
247                           I915_GEM_DOMAIN_INSTRUCTION, 0,
248                           0);
249         } else {
250             OUT_BCS_BATCH(batch, 0);
251         }
252         
253         OUT_BCS_BATCH(batch, 0);
254     }
255     
256     /* reference property 51 */
257     OUT_BCS_BATCH(batch, 0);  
258         
259     /* Macroblock status & ILDB 52-57 */
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262     OUT_BCS_BATCH(batch, 0);
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265     OUT_BCS_BATCH(batch, 0);
266
267     /* the second Macroblock status 58-60 */    
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271
272     ADVANCE_BCS_BATCH(batch);
273 }
274
275 static void
276 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
277                                  dri_bo *slice_data_bo,
278                                  int standard_select,
279                                  struct gen7_mfd_context *gen7_mfd_context)
280 {
281     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
282
283     BEGIN_BCS_BATCH(batch, 26);
284     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
285         /* MFX In BS 1-5 */
286     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
287     OUT_BCS_BATCH(batch, 0);
288     OUT_BCS_BATCH(batch, 0);
289         /* Upper bound 4-5 */   
290     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
291     OUT_BCS_BATCH(batch, 0);
292
293         /* MFX indirect MV 6-10 */
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0);
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299         
300         /* MFX IT_COFF 11-15 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307         /* MFX IT_DBLK 16-20 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314         /* MFX PAK_BSE object for encoder 21-25 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321     ADVANCE_BCS_BATCH(batch);
322 }
323
324 static void
325 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
326                                  struct decode_state *decode_state,
327                                  int standard_select,
328                                  struct gen7_mfd_context *gen7_mfd_context)
329 {
330     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
331
332     BEGIN_BCS_BATCH(batch, 10);
333     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
334
335     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
336         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
337                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
338                       0);
339         else
340                 OUT_BCS_BATCH(batch, 0);
341                 
342     OUT_BCS_BATCH(batch, 0);
343     OUT_BCS_BATCH(batch, 0);
344         /* MPR Row Store Scratch buffer 4-6 */
345     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
346         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
347                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
348                       0);
349     else
350         OUT_BCS_BATCH(batch, 0);
351
352     OUT_BCS_BATCH(batch, 0);
353     OUT_BCS_BATCH(batch, 0);
354
355         /* Bitplane 7-9 */ 
356     if (gen7_mfd_context->bitplane_read_buffer.valid)
357         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
358                       I915_GEM_DOMAIN_INSTRUCTION, 0,
359                       0);
360     else
361         OUT_BCS_BATCH(batch, 0);
362     OUT_BCS_BATCH(batch, 0);
363     OUT_BCS_BATCH(batch, 0);
364     ADVANCE_BCS_BATCH(batch);
365 }
366
367 static void
368 gen8_mfd_qm_state(VADriverContextP ctx,
369                   int qm_type,
370                   unsigned char *qm,
371                   int qm_length,
372                   struct gen7_mfd_context *gen7_mfd_context)
373 {
374     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
375     unsigned int qm_buffer[16];
376
377     assert(qm_length <= 16 * 4);
378     memcpy(qm_buffer, qm, qm_length);
379
380     BEGIN_BCS_BATCH(batch, 18);
381     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
382     OUT_BCS_BATCH(batch, qm_type << 0);
383     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
384     ADVANCE_BCS_BATCH(batch);
385 }
386
387 static void
388 gen8_mfd_avc_img_state(VADriverContextP ctx,
389                        struct decode_state *decode_state,
390                        struct gen7_mfd_context *gen7_mfd_context)
391 {
392     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
393     int img_struct;
394     int mbaff_frame_flag;
395     unsigned int width_in_mbs, height_in_mbs;
396     VAPictureParameterBufferH264 *pic_param;
397
398     assert(decode_state->pic_param && decode_state->pic_param->buffer);
399     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
400     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
401
402     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
403         img_struct = 1;
404     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
405         img_struct = 3;
406     else
407         img_struct = 0;
408
409     if ((img_struct & 0x1) == 0x1) {
410         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
411     } else {
412         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
413     }
414
415     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
416         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
417         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
418     } else {
419         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
420     }
421
422     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
423                         !pic_param->pic_fields.bits.field_pic_flag);
424
425     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
426     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
427
428     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
429     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
430            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
431     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
432
433     BEGIN_BCS_BATCH(batch, 17);
434     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
435     OUT_BCS_BATCH(batch, 
436                   width_in_mbs * height_in_mbs);
437     OUT_BCS_BATCH(batch, 
438                   ((height_in_mbs - 1) << 16) | 
439                   ((width_in_mbs - 1) << 0));
440     OUT_BCS_BATCH(batch, 
441                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
442                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
443                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
444                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
445                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
446                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
447                   (img_struct << 8));
448     OUT_BCS_BATCH(batch,
449                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
450                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
451                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
452                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
453                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
454                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
455                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
456                   (mbaff_frame_flag << 1) |
457                   (pic_param->pic_fields.bits.field_pic_flag << 0));
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     OUT_BCS_BATCH(batch, 0);
465     OUT_BCS_BATCH(batch, 0);
466     OUT_BCS_BATCH(batch, 0);
467     OUT_BCS_BATCH(batch, 0);
468     OUT_BCS_BATCH(batch, 0);
469     OUT_BCS_BATCH(batch, 0);
470     ADVANCE_BCS_BATCH(batch);
471 }
472
473 static void
474 gen8_mfd_avc_qm_state(VADriverContextP ctx,
475                       struct decode_state *decode_state,
476                       struct gen7_mfd_context *gen7_mfd_context)
477 {
478     VAIQMatrixBufferH264 *iq_matrix;
479     VAPictureParameterBufferH264 *pic_param;
480
481     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
482         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
483     else
484         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
485
486     assert(decode_state->pic_param && decode_state->pic_param->buffer);
487     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
488
489     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
490     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
491
492     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
493         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
494         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
495     }
496 }
497
498 static void
499 gen8_mfd_avc_picid_state(VADriverContextP ctx,
500                       struct decode_state *decode_state,
501                       struct gen7_mfd_context *gen7_mfd_context)
502 {
503     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
504
505     BEGIN_BCS_BATCH(batch, 10);
506     OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
507     OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
508     OUT_BCS_BATCH(batch, 0);
509     OUT_BCS_BATCH(batch, 0);
510     OUT_BCS_BATCH(batch, 0);
511     OUT_BCS_BATCH(batch, 0);
512     OUT_BCS_BATCH(batch, 0);
513     OUT_BCS_BATCH(batch, 0);
514     OUT_BCS_BATCH(batch, 0);
515     OUT_BCS_BATCH(batch, 0);
516     ADVANCE_BCS_BATCH(batch);
517 }
518
519 static void
520 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
521                               struct decode_state *decode_state,
522                               VAPictureParameterBufferH264 *pic_param,
523                               VASliceParameterBufferH264 *slice_param,
524                               struct gen7_mfd_context *gen7_mfd_context)
525 {
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i, j;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
545                           I915_GEM_DOMAIN_INSTRUCTION, 0,
546                           0);
547             OUT_BCS_BATCH(batch, 0);
548         } else {
549             OUT_BCS_BATCH(batch, 0);
550             OUT_BCS_BATCH(batch, 0);
551         }
552     }
553     
554     OUT_BCS_BATCH(batch, 0);
555
556     /* the current decoding frame/field */
557     va_pic = &pic_param->CurrPic;
558     obj_surface = decode_state->render_object;
559     assert(obj_surface->bo && obj_surface->private_data);
560     gen7_avc_surface = obj_surface->private_data;
561
562     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
563                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
564                   0);
565
566     OUT_BCS_BATCH(batch, 0);
567     OUT_BCS_BATCH(batch, 0);
568
569     /* POC List */
570     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
571         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
572             int found = 0;
573
574             assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
575
576             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
577                 va_pic = &pic_param->ReferenceFrames[j];
578                 
579                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
580                     continue;
581
582                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
583                     found = 1;
584                     break;
585                 }
586             }
587
588             assert(found == 1);
589             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
590             
591             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
592             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
593         } else {
594             OUT_BCS_BATCH(batch, 0);
595             OUT_BCS_BATCH(batch, 0);
596         }
597     }
598
599     va_pic = &pic_param->CurrPic;
600     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
601     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
602
603     ADVANCE_BCS_BATCH(batch);
604 }
605
606 static void
607 gen8_mfd_avc_slice_state(VADriverContextP ctx,
608                          VAPictureParameterBufferH264 *pic_param,
609                          VASliceParameterBufferH264 *slice_param,
610                          VASliceParameterBufferH264 *next_slice_param,
611                          struct gen7_mfd_context *gen7_mfd_context)
612 {
613     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
614     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
615     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
616     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
617     int num_ref_idx_l0, num_ref_idx_l1;
618     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
619                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
620     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
621     int slice_type;
622
623     if (slice_param->slice_type == SLICE_TYPE_I ||
624         slice_param->slice_type == SLICE_TYPE_SI) {
625         slice_type = SLICE_TYPE_I;
626     } else if (slice_param->slice_type == SLICE_TYPE_P ||
627                slice_param->slice_type == SLICE_TYPE_SP) {
628         slice_type = SLICE_TYPE_P;
629     } else { 
630         assert(slice_param->slice_type == SLICE_TYPE_B);
631         slice_type = SLICE_TYPE_B;
632     }
633
634     if (slice_type == SLICE_TYPE_I) {
635         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
636         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
637         num_ref_idx_l0 = 0;
638         num_ref_idx_l1 = 0;
639     } else if (slice_type == SLICE_TYPE_P) {
640         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
641         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
642         num_ref_idx_l1 = 0;
643     } else {
644         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
645         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
646     }
647
648     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
649     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
650     slice_ver_pos = first_mb_in_slice / width_in_mbs;
651
652     if (next_slice_param) {
653         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
654         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
655         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch, 
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch, 
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch, 
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) | 
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch, 
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720     
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch, 
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
839     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
840     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
841     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
842     assert(height_in_mbs > 0 && height_in_mbs <= 256);
843
844     /* Current decoded picture */
845     obj_surface = decode_state->render_object;
846     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
847     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
848     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
849
850     /* initial uv component for YUV400 case */
851     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
852          unsigned int uv_offset = obj_surface->width * obj_surface->height; 
853          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
854
855          drm_intel_gem_bo_map_gtt(obj_surface->bo);
856          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
857          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
858     }
859
860     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
861
862     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
863     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
864     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
865     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
866
867     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
868     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
869     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
870     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
871
872     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
873     bo = dri_bo_alloc(i965->intel.bufmgr,
874                       "intra row store",
875                       width_in_mbs * 64,
876                       0x1000);
877     assert(bo);
878     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
879     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
880
881     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
882     bo = dri_bo_alloc(i965->intel.bufmgr,
883                       "deblocking filter row store",
884                       width_in_mbs * 64 * 4,
885                       0x1000);
886     assert(bo);
887     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
888     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
889
890     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
891     bo = dri_bo_alloc(i965->intel.bufmgr,
892                       "bsd mpc row store",
893                       width_in_mbs * 64 * 2,
894                       0x1000);
895     assert(bo);
896     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
897     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
898
899     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
900     bo = dri_bo_alloc(i965->intel.bufmgr,
901                       "mpr row store",
902                       width_in_mbs * 64 * 2,
903                       0x1000);
904     assert(bo);
905     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
906     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
907
908     gen7_mfd_context->bitplane_read_buffer.valid = 0;
909 }
910
911 static void
912 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
913                             struct decode_state *decode_state,
914                             struct gen7_mfd_context *gen7_mfd_context)
915 {
916     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
917     VAPictureParameterBufferH264 *pic_param;
918     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
919     dri_bo *slice_data_bo;
920     int i, j;
921
922     assert(decode_state->pic_param && decode_state->pic_param->buffer);
923     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
924     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
925
926     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
927     intel_batchbuffer_emit_mi_flush(batch);
928     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
929     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
930     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
931     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
932     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
933     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
934     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
935
936     for (j = 0; j < decode_state->num_slice_params; j++) {
937         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
938         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
939         slice_data_bo = decode_state->slice_datas[j]->bo;
940         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
941
942         if (j == decode_state->num_slice_params - 1)
943             next_slice_group_param = NULL;
944         else
945             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
946
947         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
948             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
949             assert((slice_param->slice_type == SLICE_TYPE_I) ||
950                    (slice_param->slice_type == SLICE_TYPE_SI) ||
951                    (slice_param->slice_type == SLICE_TYPE_P) ||
952                    (slice_param->slice_type == SLICE_TYPE_SP) ||
953                    (slice_param->slice_type == SLICE_TYPE_B));
954
955             if (i < decode_state->slice_params[j]->num_elements - 1)
956                 next_slice_param = slice_param + 1;
957             else
958                 next_slice_param = next_slice_group_param;
959
960             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
961             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
962             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
963             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
964             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
965             slice_param++;
966         }
967     }
968
969     intel_batchbuffer_end_atomic(batch);
970     intel_batchbuffer_flush(batch);
971 }
972
973 static void
974 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
975                            struct decode_state *decode_state,
976                            struct gen7_mfd_context *gen7_mfd_context)
977 {
978     VAPictureParameterBufferMPEG2 *pic_param;
979     struct i965_driver_data *i965 = i965_driver_data(ctx);
980     struct object_surface *obj_surface;
981     dri_bo *bo;
982     unsigned int width_in_mbs;
983
984     assert(decode_state->pic_param && decode_state->pic_param->buffer);
985     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
986     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
987
988     mpeg2_set_reference_surfaces(
989         ctx,
990         gen7_mfd_context->reference_surface,
991         decode_state,
992         pic_param
993     );
994
995     /* Current decoded picture */
996     obj_surface = decode_state->render_object;
997     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
998
999     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1000     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1001     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1002     gen7_mfd_context->pre_deblocking_output.valid = 1;
1003
1004     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1005     bo = dri_bo_alloc(i965->intel.bufmgr,
1006                       "bsd mpc row store",
1007                       width_in_mbs * 96,
1008                       0x1000);
1009     assert(bo);
1010     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1011     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1012
1013     gen7_mfd_context->post_deblocking_output.valid = 0;
1014     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1015     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1016     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1017     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1018 }
1019
1020 static void
1021 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1022                          struct decode_state *decode_state,
1023                          struct gen7_mfd_context *gen7_mfd_context)
1024 {
1025     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1026     VAPictureParameterBufferMPEG2 *pic_param;
1027     unsigned int slice_concealment_disable_bit = 0;
1028
1029     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1030     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1031
1032     slice_concealment_disable_bit = 1;
1033
1034     BEGIN_BCS_BATCH(batch, 13);
1035     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1036     OUT_BCS_BATCH(batch,
1037                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1038                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1039                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1040                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1041                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1042                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1043                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1044                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1045                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1046                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1047                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1048                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1049     OUT_BCS_BATCH(batch,
1050                   pic_param->picture_coding_type << 9);
1051     OUT_BCS_BATCH(batch,
1052                   (slice_concealment_disable_bit << 31) |
1053                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1054                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     OUT_BCS_BATCH(batch, 0);
1061     OUT_BCS_BATCH(batch, 0);
1062     OUT_BCS_BATCH(batch, 0);
1063     OUT_BCS_BATCH(batch, 0);
1064     ADVANCE_BCS_BATCH(batch);
1065 }
1066
1067 static void
1068 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1069                         struct decode_state *decode_state,
1070                         struct gen7_mfd_context *gen7_mfd_context)
1071 {
1072     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1073     int i, j;
1074
1075     /* Update internal QM state */
1076     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1077         VAIQMatrixBufferMPEG2 * const iq_matrix =
1078             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1079
1080         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1081             iq_matrix->load_intra_quantiser_matrix) {
1082             gen_iq_matrix->load_intra_quantiser_matrix =
1083                 iq_matrix->load_intra_quantiser_matrix;
1084             if (iq_matrix->load_intra_quantiser_matrix) {
1085                 for (j = 0; j < 64; j++)
1086                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1087                         iq_matrix->intra_quantiser_matrix[j];
1088             }
1089         }
1090
1091         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1092             iq_matrix->load_non_intra_quantiser_matrix) {
1093             gen_iq_matrix->load_non_intra_quantiser_matrix =
1094                 iq_matrix->load_non_intra_quantiser_matrix;
1095             if (iq_matrix->load_non_intra_quantiser_matrix) {
1096                 for (j = 0; j < 64; j++)
1097                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1098                         iq_matrix->non_intra_quantiser_matrix[j];
1099             }
1100         }
1101     }
1102
1103     /* Commit QM state to HW */
1104     for (i = 0; i < 2; i++) {
1105         unsigned char *qm = NULL;
1106         int qm_type;
1107
1108         if (i == 0) {
1109             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1110                 qm = gen_iq_matrix->intra_quantiser_matrix;
1111                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1112             }
1113         } else {
1114             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1115                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1116                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1117             }
1118         }
1119
1120         if (!qm)
1121             continue;
1122
1123         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1124     }
1125 }
1126
1127 static void
1128 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1129                           VAPictureParameterBufferMPEG2 *pic_param,
1130                           VASliceParameterBufferMPEG2 *slice_param,
1131                           VASliceParameterBufferMPEG2 *next_slice_param,
1132                           struct gen7_mfd_context *gen7_mfd_context)
1133 {
1134     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1135     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1136     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1137
1138     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1139         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1140         is_field_pic = 1;
1141     is_field_pic_wa = is_field_pic &&
1142         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1143
1144     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1145     hpos0 = slice_param->slice_horizontal_position;
1146
1147     if (next_slice_param == NULL) {
1148         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1149         hpos1 = 0;
1150     } else {
1151         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1152         hpos1 = next_slice_param->slice_horizontal_position;
1153     }
1154
1155     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1156
1157     BEGIN_BCS_BATCH(batch, 5);
1158     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1159     OUT_BCS_BATCH(batch, 
1160                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1161     OUT_BCS_BATCH(batch, 
1162                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1163     OUT_BCS_BATCH(batch,
1164                   hpos0 << 24 |
1165                   vpos0 << 16 |
1166                   mb_count << 8 |
1167                   (next_slice_param == NULL) << 5 |
1168                   (next_slice_param == NULL) << 3 |
1169                   (slice_param->macroblock_offset & 0x7));
1170     OUT_BCS_BATCH(batch,
1171                   (slice_param->quantiser_scale_code << 24) |
1172                   (vpos1 << 8 | hpos1));
1173     ADVANCE_BCS_BATCH(batch);
1174 }
1175
1176 static void
1177 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1178                               struct decode_state *decode_state,
1179                               struct gen7_mfd_context *gen7_mfd_context)
1180 {
1181     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1182     VAPictureParameterBufferMPEG2 *pic_param;
1183     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1184     dri_bo *slice_data_bo;
1185     int i, j;
1186
1187     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1188     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1189
1190     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1191     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1192     intel_batchbuffer_emit_mi_flush(batch);
1193     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1194     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1195     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1196     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1197     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1198     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1199
1200     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1201         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1202             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1203
1204     for (j = 0; j < decode_state->num_slice_params; j++) {
1205         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1206         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1207         slice_data_bo = decode_state->slice_datas[j]->bo;
1208         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1209
1210         if (j == decode_state->num_slice_params - 1)
1211             next_slice_group_param = NULL;
1212         else
1213             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1214
1215         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1216             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1217
1218             if (i < decode_state->slice_params[j]->num_elements - 1)
1219                 next_slice_param = slice_param + 1;
1220             else
1221                 next_slice_param = next_slice_group_param;
1222
1223             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1224             slice_param++;
1225         }
1226     }
1227
1228     intel_batchbuffer_end_atomic(batch);
1229     intel_batchbuffer_flush(batch);
1230 }
1231
1232 static const int va_to_gen7_vc1_pic_type[5] = {
1233     GEN7_VC1_I_PICTURE,
1234     GEN7_VC1_P_PICTURE,
1235     GEN7_VC1_B_PICTURE,
1236     GEN7_VC1_BI_PICTURE,
1237     GEN7_VC1_P_PICTURE,
1238 };
1239
1240 static const int va_to_gen7_vc1_mv[4] = {
1241     1, /* 1-MV */
1242     2, /* 1-MV half-pel */
1243     3, /* 1-MV half-pef bilinear */
1244     0, /* Mixed MV */
1245 };
1246
1247 static const int b_picture_scale_factor[21] = {
1248     128, 85,  170, 64,  192,
1249     51,  102, 153, 204, 43,
1250     215, 37,  74,  111, 148,
1251     185, 222, 32,  96,  160, 
1252     224,
1253 };
1254
1255 static const int va_to_gen7_vc1_condover[3] = {
1256     0,
1257     2,
1258     3
1259 };
1260
1261 static const int va_to_gen7_vc1_profile[4] = {
1262     GEN7_VC1_SIMPLE_PROFILE,
1263     GEN7_VC1_MAIN_PROFILE,
1264     GEN7_VC1_RESERVED_PROFILE,
1265     GEN7_VC1_ADVANCED_PROFILE
1266 };
1267
1268 static void 
1269 gen8_mfd_free_vc1_surface(void **data)
1270 {
1271     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1272
1273     if (!gen7_vc1_surface)
1274         return;
1275
1276     dri_bo_unreference(gen7_vc1_surface->dmv);
1277     free(gen7_vc1_surface);
1278     *data = NULL;
1279 }
1280
1281 static void
1282 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1283                           VAPictureParameterBufferVC1 *pic_param,
1284                           struct object_surface *obj_surface)
1285 {
1286     struct i965_driver_data *i965 = i965_driver_data(ctx);
1287     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1288     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1289     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1290
1291     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1292
1293     if (!gen7_vc1_surface) {
1294         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1295         assert((obj_surface->size & 0x3f) == 0);
1296         obj_surface->private_data = gen7_vc1_surface;
1297     }
1298
1299     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1300
1301     if (gen7_vc1_surface->dmv == NULL) {
1302         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1303                                              "direct mv w/r buffer",
1304                                              width_in_mbs * height_in_mbs * 64,
1305                                              0x1000);
1306     }
1307 }
1308
1309 static void
1310 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1311                          struct decode_state *decode_state,
1312                          struct gen7_mfd_context *gen7_mfd_context)
1313 {
1314     VAPictureParameterBufferVC1 *pic_param;
1315     struct i965_driver_data *i965 = i965_driver_data(ctx);
1316     struct object_surface *obj_surface;
1317     dri_bo *bo;
1318     int width_in_mbs;
1319     int picture_type;
1320
1321     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1322     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1323     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1324     picture_type = pic_param->picture_fields.bits.picture_type;
1325  
1326     intel_update_vc1_frame_store_index(ctx,
1327                                        decode_state,
1328                                        pic_param,
1329                                        gen7_mfd_context->reference_surface);
1330
1331     /* Current decoded picture */
1332     obj_surface = decode_state->render_object;
1333     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1334     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1335
1336     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1337     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1338     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1339     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1340
1341     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1342     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1343     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1344     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1345
1346     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1347     bo = dri_bo_alloc(i965->intel.bufmgr,
1348                       "intra row store",
1349                       width_in_mbs * 64,
1350                       0x1000);
1351     assert(bo);
1352     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1353     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1354
1355     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1356     bo = dri_bo_alloc(i965->intel.bufmgr,
1357                       "deblocking filter row store",
1358                       width_in_mbs * 7 * 64,
1359                       0x1000);
1360     assert(bo);
1361     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1362     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1363
1364     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1365     bo = dri_bo_alloc(i965->intel.bufmgr,
1366                       "bsd mpc row store",
1367                       width_in_mbs * 96,
1368                       0x1000);
1369     assert(bo);
1370     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1371     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1372
1373     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1374
1375     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1376     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1377     
1378     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1379         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1380         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1381         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1382         int src_w, src_h;
1383         uint8_t *src = NULL, *dst = NULL;
1384
1385         assert(decode_state->bit_plane->buffer);
1386         src = decode_state->bit_plane->buffer;
1387
1388         bo = dri_bo_alloc(i965->intel.bufmgr,
1389                           "VC-1 Bitplane",
1390                           bitplane_width * height_in_mbs,
1391                           0x1000);
1392         assert(bo);
1393         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1394
1395         dri_bo_map(bo, True);
1396         assert(bo->virtual);
1397         dst = bo->virtual;
1398
1399         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1400             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1401                 int src_index, dst_index;
1402                 int src_shift;
1403                 uint8_t src_value;
1404
1405                 src_index = (src_h * width_in_mbs + src_w) / 2;
1406                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1407                 src_value = ((src[src_index] >> src_shift) & 0xf);
1408
1409                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1410                     src_value |= 0x2;
1411                 }
1412
1413                 dst_index = src_w / 2;
1414                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1415             }
1416
1417             if (src_w & 1)
1418                 dst[src_w / 2] >>= 4;
1419
1420             dst += bitplane_width;
1421         }
1422
1423         dri_bo_unmap(bo);
1424     } else
1425         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1426 }
1427
1428 static void
1429 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1430                        struct decode_state *decode_state,
1431                        struct gen7_mfd_context *gen7_mfd_context)
1432 {
1433     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1434     VAPictureParameterBufferVC1 *pic_param;
1435     struct object_surface *obj_surface;
1436     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1437     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1438     int unified_mv_mode;
1439     int ref_field_pic_polarity = 0;
1440     int scale_factor = 0;
1441     int trans_ac_y = 0;
1442     int dmv_surface_valid = 0;
1443     int brfd = 0;
1444     int fcm = 0;
1445     int picture_type;
1446     int profile;
1447     int overlap;
1448     int interpolation_mode = 0;
1449
1450     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1451     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1452
1453     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1454     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1455     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1456     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1457     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1458     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1459     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1460     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1461
1462     if (dquant == 0) {
1463         alt_pquant_config = 0;
1464         alt_pquant_edge_mask = 0;
1465     } else if (dquant == 2) {
1466         alt_pquant_config = 1;
1467         alt_pquant_edge_mask = 0xf;
1468     } else {
1469         assert(dquant == 1);
1470         if (dquantfrm == 0) {
1471             alt_pquant_config = 0;
1472             alt_pquant_edge_mask = 0;
1473             alt_pq = 0;
1474         } else {
1475             assert(dquantfrm == 1);
1476             alt_pquant_config = 1;
1477
1478             switch (dqprofile) {
1479             case 3:
1480                 if (dqbilevel == 0) {
1481                     alt_pquant_config = 2;
1482                     alt_pquant_edge_mask = 0;
1483                 } else {
1484                     assert(dqbilevel == 1);
1485                     alt_pquant_config = 3;
1486                     alt_pquant_edge_mask = 0;
1487                 }
1488                 break;
1489                 
1490             case 0:
1491                 alt_pquant_edge_mask = 0xf;
1492                 break;
1493
1494             case 1:
1495                 if (dqdbedge == 3)
1496                     alt_pquant_edge_mask = 0x9;
1497                 else
1498                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1499
1500                 break;
1501
1502             case 2:
1503                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1504                 break;
1505
1506             default:
1507                 assert(0);
1508             }
1509         }
1510     }
1511
1512     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1513         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1514         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1515     } else {
1516         assert(pic_param->mv_fields.bits.mv_mode < 4);
1517         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1518     }
1519
1520     if (pic_param->sequence_fields.bits.interlace == 1 &&
1521         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1522         /* FIXME: calculate reference field picture polarity */
1523         assert(0);
1524         ref_field_pic_polarity = 0;
1525     }
1526
1527     if (pic_param->b_picture_fraction < 21)
1528         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1529
1530     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1531     
1532     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1533         picture_type == GEN7_VC1_I_PICTURE)
1534         picture_type = GEN7_VC1_BI_PICTURE;
1535
1536     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1537         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1538     else {
1539         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1540
1541         /*
1542          * 8.3.6.2.1 Transform Type Selection
1543          * If variable-sized transform coding is not enabled,
1544          * then the 8x8 transform shall be used for all blocks.
1545          * it is also MFX_VC1_PIC_STATE requirement.
1546          */
1547         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1548             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1549             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1550         }
1551     }
1552
1553     if (picture_type == GEN7_VC1_B_PICTURE) {
1554         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1555
1556         obj_surface = decode_state->reference_objects[1];
1557
1558         if (obj_surface)
1559             gen7_vc1_surface = obj_surface->private_data;
1560
1561         if (!gen7_vc1_surface || 
1562             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1563              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1564             dmv_surface_valid = 0;
1565         else
1566             dmv_surface_valid = 1;
1567     }
1568
1569     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1570
1571     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1572         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1573     else {
1574         if (pic_param->picture_fields.bits.top_field_first)
1575             fcm = 2;
1576         else
1577             fcm = 3;
1578     }
1579
1580     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1581         brfd = pic_param->reference_fields.bits.reference_distance;
1582         brfd = (scale_factor * brfd) >> 8;
1583         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1584
1585         if (brfd < 0)
1586             brfd = 0;
1587     }
1588
1589     overlap = 0;
1590     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1591         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1592             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1593             overlap = 1; 
1594         }
1595     }else {
1596         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1597              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1598               overlap = 1; 
1599         }
1600         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1601             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1602              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1603                 overlap = 1; 
1604              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1605                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1606                  overlap = 1;
1607              }
1608         }
1609     } 
1610
1611     assert(pic_param->conditional_overlap_flag < 3);
1612     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1613
1614     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1615         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1616          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1617         interpolation_mode = 9; /* Half-pel bilinear */
1618     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1619              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1620               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1621         interpolation_mode = 1; /* Half-pel bicubic */
1622     else
1623         interpolation_mode = 0; /* Quarter-pel bicubic */
1624
1625     BEGIN_BCS_BATCH(batch, 6);
1626     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1627     OUT_BCS_BATCH(batch,
1628                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1629                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1630     OUT_BCS_BATCH(batch,
1631                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1632                   dmv_surface_valid << 15 |
1633                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1634                   pic_param->rounding_control << 13 |
1635                   pic_param->sequence_fields.bits.syncmarker << 12 |
1636                   interpolation_mode << 8 |
1637                   0 << 7 | /* FIXME: scale up or down ??? */
1638                   pic_param->range_reduction_frame << 6 |
1639                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1640                   overlap << 4 |
1641                   !pic_param->picture_fields.bits.is_first_field << 3 |
1642                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1643     OUT_BCS_BATCH(batch,
1644                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1645                   picture_type << 26 |
1646                   fcm << 24 |
1647                   alt_pq << 16 |
1648                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1649                   scale_factor << 0);
1650     OUT_BCS_BATCH(batch,
1651                   unified_mv_mode << 28 |
1652                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1653                   pic_param->fast_uvmc_flag << 26 |
1654                   ref_field_pic_polarity << 25 |
1655                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1656                   pic_param->reference_fields.bits.reference_distance << 20 |
1657                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1658                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1659                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1660                   alt_pquant_edge_mask << 4 |
1661                   alt_pquant_config << 2 |
1662                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1663                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1664     OUT_BCS_BATCH(batch,
1665                   !!pic_param->bitplane_present.value << 31 |
1666                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1667                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1668                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1669                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1670                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1671                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1672                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1673                   pic_param->mv_fields.bits.mv_table << 20 |
1674                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1675                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1676                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1677                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1678                   pic_param->mb_mode_table << 8 |
1679                   trans_ac_y << 6 |
1680                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1681                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1682                   pic_param->cbp_table << 0);
1683     ADVANCE_BCS_BATCH(batch);
1684 }
1685
1686 static void
1687 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1688                              struct decode_state *decode_state,
1689                              struct gen7_mfd_context *gen7_mfd_context)
1690 {
1691     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1692     VAPictureParameterBufferVC1 *pic_param;
1693     int intensitycomp_single;
1694
1695     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1696     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1697
1698     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1699     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1700     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1701
1702     BEGIN_BCS_BATCH(batch, 6);
1703     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1704     OUT_BCS_BATCH(batch,
1705                   0 << 14 | /* FIXME: double ??? */
1706                   0 << 12 |
1707                   intensitycomp_single << 10 |
1708                   intensitycomp_single << 8 |
1709                   0 << 4 | /* FIXME: interlace mode */
1710                   0);
1711     OUT_BCS_BATCH(batch,
1712                   pic_param->luma_shift << 16 |
1713                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1714     OUT_BCS_BATCH(batch, 0);
1715     OUT_BCS_BATCH(batch, 0);
1716     OUT_BCS_BATCH(batch, 0);
1717     ADVANCE_BCS_BATCH(batch);
1718 }
1719
1720 static void
1721 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1722                               struct decode_state *decode_state,
1723                               struct gen7_mfd_context *gen7_mfd_context)
1724 {
1725     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1726     struct object_surface *obj_surface;
1727     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1728
1729     obj_surface = decode_state->render_object;
1730
1731     if (obj_surface && obj_surface->private_data) {
1732         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1733     }
1734
1735     obj_surface = decode_state->reference_objects[1];
1736
1737     if (obj_surface && obj_surface->private_data) {
1738         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1739     }
1740
1741     BEGIN_BCS_BATCH(batch, 7);
1742     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1743
1744     if (dmv_write_buffer)
1745         OUT_BCS_RELOC(batch, dmv_write_buffer,
1746                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1747                       0);
1748     else
1749         OUT_BCS_BATCH(batch, 0);
1750
1751     OUT_BCS_BATCH(batch, 0);
1752     OUT_BCS_BATCH(batch, 0);
1753
1754     if (dmv_read_buffer)
1755         OUT_BCS_RELOC(batch, dmv_read_buffer,
1756                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1757                       0);
1758     else
1759         OUT_BCS_BATCH(batch, 0);
1760     
1761     OUT_BCS_BATCH(batch, 0);
1762     OUT_BCS_BATCH(batch, 0);
1763                   
1764     ADVANCE_BCS_BATCH(batch);
1765 }
1766
1767 static int
1768 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1769 {
1770     int out_slice_data_bit_offset;
1771     int slice_header_size = in_slice_data_bit_offset / 8;
1772     int i, j;
1773
1774     if (profile != 3)
1775         out_slice_data_bit_offset = in_slice_data_bit_offset;
1776     else {
1777         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1778             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1779                 i++, j += 2;
1780             }
1781         }
1782
1783         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1784     }
1785
1786     return out_slice_data_bit_offset;
1787 }
1788
1789 static void
1790 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1791                         VAPictureParameterBufferVC1 *pic_param,
1792                         VASliceParameterBufferVC1 *slice_param,
1793                         VASliceParameterBufferVC1 *next_slice_param,
1794                         dri_bo *slice_data_bo,
1795                         struct gen7_mfd_context *gen7_mfd_context)
1796 {
1797     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1798     int next_slice_start_vert_pos;
1799     int macroblock_offset;
1800     uint8_t *slice_data = NULL;
1801
1802     dri_bo_map(slice_data_bo, 0);
1803     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1804     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1805                                                                slice_param->macroblock_offset,
1806                                                                pic_param->sequence_fields.bits.profile);
1807     dri_bo_unmap(slice_data_bo);
1808
1809     if (next_slice_param)
1810         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1811     else
1812         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1813
1814     BEGIN_BCS_BATCH(batch, 5);
1815     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1816     OUT_BCS_BATCH(batch, 
1817                   slice_param->slice_data_size - (macroblock_offset >> 3));
1818     OUT_BCS_BATCH(batch, 
1819                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1820     OUT_BCS_BATCH(batch,
1821                   slice_param->slice_vertical_position << 16 |
1822                   next_slice_start_vert_pos << 0);
1823     OUT_BCS_BATCH(batch,
1824                   (macroblock_offset & 0x7));
1825     ADVANCE_BCS_BATCH(batch);
1826 }
1827
1828 static void
1829 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1830                             struct decode_state *decode_state,
1831                             struct gen7_mfd_context *gen7_mfd_context)
1832 {
1833     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1834     VAPictureParameterBufferVC1 *pic_param;
1835     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1836     dri_bo *slice_data_bo;
1837     int i, j;
1838
1839     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1840     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1841
1842     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1843     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1844     intel_batchbuffer_emit_mi_flush(batch);
1845     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1846     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1847     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1848     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1849     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1850     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1851     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1852
1853     for (j = 0; j < decode_state->num_slice_params; j++) {
1854         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1855         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1856         slice_data_bo = decode_state->slice_datas[j]->bo;
1857         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1858
1859         if (j == decode_state->num_slice_params - 1)
1860             next_slice_group_param = NULL;
1861         else
1862             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1863
1864         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1865             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1866
1867             if (i < decode_state->slice_params[j]->num_elements - 1)
1868                 next_slice_param = slice_param + 1;
1869             else
1870                 next_slice_param = next_slice_group_param;
1871
1872             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1873             slice_param++;
1874         }
1875     }
1876
1877     intel_batchbuffer_end_atomic(batch);
1878     intel_batchbuffer_flush(batch);
1879 }
1880
1881 static void
1882 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1883                           struct decode_state *decode_state,
1884                           struct gen7_mfd_context *gen7_mfd_context)
1885 {
1886     struct object_surface *obj_surface;
1887     VAPictureParameterBufferJPEGBaseline *pic_param;
1888     int subsampling = SUBSAMPLE_YUV420;
1889
1890     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1891
1892     if (pic_param->num_components == 1)
1893         subsampling = SUBSAMPLE_YUV400;
1894     else if (pic_param->num_components == 3) {
1895         int h1 = pic_param->components[0].h_sampling_factor;
1896         int h2 = pic_param->components[1].h_sampling_factor;
1897         int h3 = pic_param->components[2].h_sampling_factor;
1898         int v1 = pic_param->components[0].v_sampling_factor;
1899         int v2 = pic_param->components[1].v_sampling_factor;
1900         int v3 = pic_param->components[2].v_sampling_factor;
1901
1902         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1903             v1 == 2 && v2 == 1 && v3 == 1)
1904             subsampling = SUBSAMPLE_YUV420;
1905         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1906                  v1 == 1 && v2 == 1 && v3 == 1)
1907             subsampling = SUBSAMPLE_YUV422H;
1908         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1909                  v1 == 1 && v2 == 1 && v3 == 1)
1910             subsampling = SUBSAMPLE_YUV444;
1911         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1912                  v1 == 1 && v2 == 1 && v3 == 1)
1913             subsampling = SUBSAMPLE_YUV411;
1914         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1915                  v1 == 2 && v2 == 1 && v3 == 1)
1916             subsampling = SUBSAMPLE_YUV422V;
1917         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1918                  v1 == 2 && v2 == 2 && v3 == 2)
1919             subsampling = SUBSAMPLE_YUV422H;
1920         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1921                  v1 == 2 && v2 == 1 && v3 == 1)
1922             subsampling = SUBSAMPLE_YUV422V;
1923         else
1924             assert(0);
1925     } else {
1926         assert(0);
1927     }
1928
1929     /* Current decoded picture */
1930     obj_surface = decode_state->render_object;
1931     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
1932
1933     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1934     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1935     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1936     gen7_mfd_context->pre_deblocking_output.valid = 1;
1937
1938     gen7_mfd_context->post_deblocking_output.bo = NULL;
1939     gen7_mfd_context->post_deblocking_output.valid = 0;
1940
1941     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1942     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1943
1944     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1945     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1946
1947     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1948     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1949
1950     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1951     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1952
1953     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1954     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1955 }
1956
1957 static const int va_to_gen7_jpeg_rotation[4] = {
1958     GEN7_JPEG_ROTATION_0,
1959     GEN7_JPEG_ROTATION_90,
1960     GEN7_JPEG_ROTATION_180,
1961     GEN7_JPEG_ROTATION_270
1962 };
1963
1964 static void
1965 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1966                         struct decode_state *decode_state,
1967                         struct gen7_mfd_context *gen7_mfd_context)
1968 {
1969     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1970     VAPictureParameterBufferJPEGBaseline *pic_param;
1971     int chroma_type = GEN7_YUV420;
1972     int frame_width_in_blks;
1973     int frame_height_in_blks;
1974
1975     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1976     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1977
1978     if (pic_param->num_components == 1)
1979         chroma_type = GEN7_YUV400;
1980     else if (pic_param->num_components == 3) {
1981         int h1 = pic_param->components[0].h_sampling_factor;
1982         int h2 = pic_param->components[1].h_sampling_factor;
1983         int h3 = pic_param->components[2].h_sampling_factor;
1984         int v1 = pic_param->components[0].v_sampling_factor;
1985         int v2 = pic_param->components[1].v_sampling_factor;
1986         int v3 = pic_param->components[2].v_sampling_factor;
1987
1988         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1989             v1 == 2 && v2 == 1 && v3 == 1)
1990             chroma_type = GEN7_YUV420;
1991         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1992                  v1 == 1 && v2 == 1 && v3 == 1)
1993             chroma_type = GEN7_YUV422H_2Y;
1994         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1995                  v1 == 1 && v2 == 1 && v3 == 1)
1996             chroma_type = GEN7_YUV444;
1997         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1998                  v1 == 1 && v2 == 1 && v3 == 1)
1999             chroma_type = GEN7_YUV411;
2000         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2001                  v1 == 2 && v2 == 1 && v3 == 1)
2002             chroma_type = GEN7_YUV422V_2Y;
2003         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2004                  v1 == 2 && v2 == 2 && v3 == 2)
2005             chroma_type = GEN7_YUV422H_4Y;
2006         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2007                  v1 == 2 && v2 == 1 && v3 == 1)
2008             chroma_type = GEN7_YUV422V_4Y;
2009         else
2010             assert(0);
2011     }
2012
2013     if (chroma_type == GEN7_YUV400 ||
2014         chroma_type == GEN7_YUV444 ||
2015         chroma_type == GEN7_YUV422V_2Y) {
2016         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2017         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2018     } else if (chroma_type == GEN7_YUV411) {
2019         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2020         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2021     } else {
2022         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2023         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2024     }
2025
2026     BEGIN_BCS_BATCH(batch, 3);
2027     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2028     OUT_BCS_BATCH(batch,
2029                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2030                   (chroma_type << 0));
2031     OUT_BCS_BATCH(batch,
2032                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2033                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2034     ADVANCE_BCS_BATCH(batch);
2035 }
2036
2037 static const int va_to_gen7_jpeg_hufftable[2] = {
2038     MFX_HUFFTABLE_ID_Y,
2039     MFX_HUFFTABLE_ID_UV
2040 };
2041
2042 static void
2043 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2044                                struct decode_state *decode_state,
2045                                struct gen7_mfd_context *gen7_mfd_context,
2046                                int num_tables)
2047 {
2048     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2049     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2050     int index;
2051
2052     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2053         return;
2054
2055     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2056
2057     for (index = 0; index < num_tables; index++) {
2058         int id = va_to_gen7_jpeg_hufftable[index];
2059         if (!huffman_table->load_huffman_table[index])
2060             continue;
2061         BEGIN_BCS_BATCH(batch, 53);
2062         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2063         OUT_BCS_BATCH(batch, id);
2064         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2065         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2066         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2067         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2068         ADVANCE_BCS_BATCH(batch);
2069     }
2070 }
2071
2072 static const int va_to_gen7_jpeg_qm[5] = {
2073     -1,
2074     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2075     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2076     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2077     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2078 };
2079
2080 static void
2081 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2082                        struct decode_state *decode_state,
2083                        struct gen7_mfd_context *gen7_mfd_context)
2084 {
2085     VAPictureParameterBufferJPEGBaseline *pic_param;
2086     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2087     int index;
2088
2089     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2090         return;
2091
2092     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2093     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2094
2095     assert(pic_param->num_components <= 3);
2096
2097     for (index = 0; index < pic_param->num_components; index++) {
2098         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2099         int qm_type;
2100         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2101         unsigned char raster_qm[64];
2102         int j;
2103
2104         if (id > 4 || id < 1)
2105             continue;
2106
2107         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2108             continue;
2109
2110         qm_type = va_to_gen7_jpeg_qm[id];
2111
2112         for (j = 0; j < 64; j++)
2113             raster_qm[zigzag_direct[j]] = qm[j];
2114
2115         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2116     }
2117 }
2118
2119 static void
2120 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2121                          VAPictureParameterBufferJPEGBaseline *pic_param,
2122                          VASliceParameterBufferJPEGBaseline *slice_param,
2123                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2124                          dri_bo *slice_data_bo,
2125                          struct gen7_mfd_context *gen7_mfd_context)
2126 {
2127     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2128     int scan_component_mask = 0;
2129     int i;
2130
2131     assert(slice_param->num_components > 0);
2132     assert(slice_param->num_components < 4);
2133     assert(slice_param->num_components <= pic_param->num_components);
2134
2135     for (i = 0; i < slice_param->num_components; i++) {
2136         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2137         case 1:
2138             scan_component_mask |= (1 << 0);
2139             break;
2140         case 2:
2141             scan_component_mask |= (1 << 1);
2142             break;
2143         case 3:
2144             scan_component_mask |= (1 << 2);
2145             break;
2146         default:
2147             assert(0);
2148             break;
2149         }
2150     }
2151
2152     BEGIN_BCS_BATCH(batch, 6);
2153     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2154     OUT_BCS_BATCH(batch, 
2155                   slice_param->slice_data_size);
2156     OUT_BCS_BATCH(batch, 
2157                   slice_param->slice_data_offset);
2158     OUT_BCS_BATCH(batch,
2159                   slice_param->slice_horizontal_position << 16 |
2160                   slice_param->slice_vertical_position << 0);
2161     OUT_BCS_BATCH(batch,
2162                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2163                   (scan_component_mask << 27) |                 /* scan components */
2164                   (0 << 26) |   /* disable interrupt allowed */
2165                   (slice_param->num_mcus << 0));                /* MCU count */
2166     OUT_BCS_BATCH(batch,
2167                   (slice_param->restart_interval << 0));    /* RestartInterval */
2168     ADVANCE_BCS_BATCH(batch);
2169 }
2170
2171 /* Workaround for JPEG decoding on Ivybridge */
2172 #ifdef JPEG_WA
2173
2174 VAStatus 
2175 i965_DestroySurfaces(VADriverContextP ctx,
2176                      VASurfaceID *surface_list,
2177                      int num_surfaces);
2178 VAStatus 
2179 i965_CreateSurfaces(VADriverContextP ctx,
2180                     int width,
2181                     int height,
2182                     int format,
2183                     int num_surfaces,
2184                     VASurfaceID *surfaces);
2185
2186 static struct {
2187     int width;
2188     int height;
2189     unsigned char data[32];
2190     int data_size;
2191     int data_bit_offset;
2192     int qp;
2193 } gen7_jpeg_wa_clip = {
2194     16,
2195     16,
2196     {
2197         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2198         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2199     },
2200     14,
2201     40,
2202     28,
2203 };
2204
2205 static void
2206 gen8_jpeg_wa_init(VADriverContextP ctx,
2207                   struct gen7_mfd_context *gen7_mfd_context)
2208 {
2209     struct i965_driver_data *i965 = i965_driver_data(ctx);
2210     VAStatus status;
2211     struct object_surface *obj_surface;
2212
2213     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2214         i965_DestroySurfaces(ctx,
2215                              &gen7_mfd_context->jpeg_wa_surface_id,
2216                              1);
2217
2218     status = i965_CreateSurfaces(ctx,
2219                                  gen7_jpeg_wa_clip.width,
2220                                  gen7_jpeg_wa_clip.height,
2221                                  VA_RT_FORMAT_YUV420,
2222                                  1,
2223                                  &gen7_mfd_context->jpeg_wa_surface_id);
2224     assert(status == VA_STATUS_SUCCESS);
2225
2226     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2227     assert(obj_surface);
2228     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2229     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2230
2231     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2232         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2233                                                                "JPEG WA data",
2234                                                                0x1000,
2235                                                                0x1000);
2236         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2237                        0,
2238                        gen7_jpeg_wa_clip.data_size,
2239                        gen7_jpeg_wa_clip.data);
2240     }
2241 }
2242
2243 static void
2244 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2245                               struct gen7_mfd_context *gen7_mfd_context)
2246 {
2247     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2248
2249     BEGIN_BCS_BATCH(batch, 5);
2250     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2251     OUT_BCS_BATCH(batch,
2252                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2253                   (MFD_MODE_VLD << 15) | /* VLD mode */
2254                   (0 << 10) | /* disable Stream-Out */
2255                   (0 << 9)  | /* Post Deblocking Output */
2256                   (1 << 8)  | /* Pre Deblocking Output */
2257                   (0 << 5)  | /* not in stitch mode */
2258                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2259                   (MFX_FORMAT_AVC << 0));
2260     OUT_BCS_BATCH(batch,
2261                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2262                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2263                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2264                   (0 << 1)  |
2265                   (0 << 0));
2266     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2267     OUT_BCS_BATCH(batch, 0); /* reserved */
2268     ADVANCE_BCS_BATCH(batch);
2269 }
2270
2271 static void
2272 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2273                            struct gen7_mfd_context *gen7_mfd_context)
2274 {
2275     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2276     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2277
2278     BEGIN_BCS_BATCH(batch, 6);
2279     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2280     OUT_BCS_BATCH(batch, 0);
2281     OUT_BCS_BATCH(batch,
2282                   ((obj_surface->orig_width - 1) << 18) |
2283                   ((obj_surface->orig_height - 1) << 4));
2284     OUT_BCS_BATCH(batch,
2285                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2286                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2287                   (0 << 22) | /* surface object control state, ignored */
2288                   ((obj_surface->width - 1) << 3) | /* pitch */
2289                   (0 << 2)  | /* must be 0 */
2290                   (1 << 1)  | /* must be tiled */
2291                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2292     OUT_BCS_BATCH(batch,
2293                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2294                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2295     OUT_BCS_BATCH(batch,
2296                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2297                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2298     ADVANCE_BCS_BATCH(batch);
2299 }
2300
2301 static void
2302 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2303                                  struct gen7_mfd_context *gen7_mfd_context)
2304 {
2305     struct i965_driver_data *i965 = i965_driver_data(ctx);
2306     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2307     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2308     dri_bo *intra_bo;
2309     int i;
2310
2311     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2312                             "intra row store",
2313                             128 * 64,
2314                             0x1000);
2315
2316     BEGIN_BCS_BATCH(batch, 61);
2317     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2318     OUT_BCS_RELOC(batch,
2319                   obj_surface->bo,
2320                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2321                   0);
2322         OUT_BCS_BATCH(batch, 0);
2323         OUT_BCS_BATCH(batch, 0);
2324     
2325
2326     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2327         OUT_BCS_BATCH(batch, 0);
2328         OUT_BCS_BATCH(batch, 0);
2329
2330         /* uncompressed-video & stream out 7-12 */
2331     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2332     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2333         OUT_BCS_BATCH(batch, 0);
2334         OUT_BCS_BATCH(batch, 0);
2335         OUT_BCS_BATCH(batch, 0);
2336         OUT_BCS_BATCH(batch, 0);
2337
2338         /* the DW 13-15 is for intra row store scratch */
2339     OUT_BCS_RELOC(batch,
2340                   intra_bo,
2341                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2342                   0);
2343         OUT_BCS_BATCH(batch, 0);
2344         OUT_BCS_BATCH(batch, 0);
2345
2346         /* the DW 16-18 is for deblocking filter */ 
2347     OUT_BCS_BATCH(batch, 0);
2348         OUT_BCS_BATCH(batch, 0);
2349         OUT_BCS_BATCH(batch, 0);
2350
2351     /* DW 19..50 */
2352     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2353         OUT_BCS_BATCH(batch, 0);
2354         OUT_BCS_BATCH(batch, 0);
2355     }
2356     OUT_BCS_BATCH(batch, 0);
2357
2358         /* the DW52-54 is for mb status address */
2359     OUT_BCS_BATCH(batch, 0);
2360         OUT_BCS_BATCH(batch, 0);
2361         OUT_BCS_BATCH(batch, 0);
2362         /* the DW56-60 is for ILDB & second ILDB address */
2363     OUT_BCS_BATCH(batch, 0);
2364         OUT_BCS_BATCH(batch, 0);
2365         OUT_BCS_BATCH(batch, 0);
2366     OUT_BCS_BATCH(batch, 0);
2367         OUT_BCS_BATCH(batch, 0);
2368         OUT_BCS_BATCH(batch, 0);
2369
2370     ADVANCE_BCS_BATCH(batch);
2371
2372     dri_bo_unreference(intra_bo);
2373 }
2374
2375 static void
2376 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2377                                      struct gen7_mfd_context *gen7_mfd_context)
2378 {
2379     struct i965_driver_data *i965 = i965_driver_data(ctx);
2380     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2381     dri_bo *bsd_mpc_bo, *mpr_bo;
2382
2383     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2384                               "bsd mpc row store",
2385                               11520, /* 1.5 * 120 * 64 */
2386                               0x1000);
2387
2388     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2389                           "mpr row store",
2390                           7680, /* 1. 0 * 120 * 64 */
2391                           0x1000);
2392
2393     BEGIN_BCS_BATCH(batch, 10);
2394     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2395
2396     OUT_BCS_RELOC(batch,
2397                   bsd_mpc_bo,
2398                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2399                   0);
2400
2401     OUT_BCS_BATCH(batch, 0);
2402     OUT_BCS_BATCH(batch, 0);
2403
2404     OUT_BCS_RELOC(batch,
2405                   mpr_bo,
2406                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2407                   0);
2408     OUT_BCS_BATCH(batch, 0);
2409     OUT_BCS_BATCH(batch, 0);
2410
2411     OUT_BCS_BATCH(batch, 0);
2412     OUT_BCS_BATCH(batch, 0);
2413     OUT_BCS_BATCH(batch, 0);
2414
2415     ADVANCE_BCS_BATCH(batch);
2416
2417     dri_bo_unreference(bsd_mpc_bo);
2418     dri_bo_unreference(mpr_bo);
2419 }
2420
2421 static void
2422 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2423                           struct gen7_mfd_context *gen7_mfd_context)
2424 {
2425
2426 }
2427
2428 static void
2429 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2430                            struct gen7_mfd_context *gen7_mfd_context)
2431 {
2432     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2433     int img_struct = 0;
2434     int mbaff_frame_flag = 0;
2435     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2436
2437     BEGIN_BCS_BATCH(batch, 16);
2438     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2439     OUT_BCS_BATCH(batch, 
2440                   width_in_mbs * height_in_mbs);
2441     OUT_BCS_BATCH(batch, 
2442                   ((height_in_mbs - 1) << 16) | 
2443                   ((width_in_mbs - 1) << 0));
2444     OUT_BCS_BATCH(batch, 
2445                   (0 << 24) |
2446                   (0 << 16) |
2447                   (0 << 14) |
2448                   (0 << 13) |
2449                   (0 << 12) | /* differ from GEN6 */
2450                   (0 << 10) |
2451                   (img_struct << 8));
2452     OUT_BCS_BATCH(batch,
2453                   (1 << 10) | /* 4:2:0 */
2454                   (1 << 7) |  /* CABAC */
2455                   (0 << 6) |
2456                   (0 << 5) |
2457                   (0 << 4) |
2458                   (0 << 3) |
2459                   (1 << 2) |
2460                   (mbaff_frame_flag << 1) |
2461                   (0 << 0));
2462     OUT_BCS_BATCH(batch, 0);
2463     OUT_BCS_BATCH(batch, 0);
2464     OUT_BCS_BATCH(batch, 0);
2465     OUT_BCS_BATCH(batch, 0);
2466     OUT_BCS_BATCH(batch, 0);
2467     OUT_BCS_BATCH(batch, 0);
2468     OUT_BCS_BATCH(batch, 0);
2469     OUT_BCS_BATCH(batch, 0);
2470     OUT_BCS_BATCH(batch, 0);
2471     OUT_BCS_BATCH(batch, 0);
2472     OUT_BCS_BATCH(batch, 0);
2473     ADVANCE_BCS_BATCH(batch);
2474 }
2475
2476 static void
2477 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2478                                   struct gen7_mfd_context *gen7_mfd_context)
2479 {
2480     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2481     int i;
2482
2483     BEGIN_BCS_BATCH(batch, 71);
2484     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2485
2486     /* reference surfaces 0..15 */
2487     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2488         OUT_BCS_BATCH(batch, 0); /* top */
2489         OUT_BCS_BATCH(batch, 0); /* bottom */
2490     }
2491         
2492         OUT_BCS_BATCH(batch, 0);
2493
2494     /* the current decoding frame/field */
2495     OUT_BCS_BATCH(batch, 0); /* top */
2496     OUT_BCS_BATCH(batch, 0);
2497     OUT_BCS_BATCH(batch, 0);
2498
2499     /* POC List */
2500     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2501         OUT_BCS_BATCH(batch, 0);
2502         OUT_BCS_BATCH(batch, 0);
2503     }
2504
2505     OUT_BCS_BATCH(batch, 0);
2506     OUT_BCS_BATCH(batch, 0);
2507
2508     ADVANCE_BCS_BATCH(batch);
2509 }
2510
2511 static void
2512 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2513                                      struct gen7_mfd_context *gen7_mfd_context)
2514 {
2515     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2516
2517     BEGIN_BCS_BATCH(batch, 11);
2518     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2519     OUT_BCS_RELOC(batch,
2520                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2521                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2522                   0);
2523     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2524     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2525     OUT_BCS_BATCH(batch, 0);
2526     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2527     OUT_BCS_BATCH(batch, 0);
2528     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2529     OUT_BCS_BATCH(batch, 0);
2530     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2531     OUT_BCS_BATCH(batch, 0);
2532     ADVANCE_BCS_BATCH(batch);
2533 }
2534
2535 static void
2536 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2537                             struct gen7_mfd_context *gen7_mfd_context)
2538 {
2539     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2540
2541     /* the input bitsteam format on GEN7 differs from GEN6 */
2542     BEGIN_BCS_BATCH(batch, 6);
2543     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2544     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2545     OUT_BCS_BATCH(batch, 0);
2546     OUT_BCS_BATCH(batch,
2547                   (0 << 31) |
2548                   (0 << 14) |
2549                   (0 << 12) |
2550                   (0 << 10) |
2551                   (0 << 8));
2552     OUT_BCS_BATCH(batch,
2553                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2554                   (0 << 5)  |
2555                   (0 << 4)  |
2556                   (1 << 3) | /* LastSlice Flag */
2557                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2558     OUT_BCS_BATCH(batch, 0);
2559     ADVANCE_BCS_BATCH(batch);
2560 }
2561
2562 static void
2563 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2564                              struct gen7_mfd_context *gen7_mfd_context)
2565 {
2566     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2567     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2568     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2569     int first_mb_in_slice = 0;
2570     int slice_type = SLICE_TYPE_I;
2571
2572     BEGIN_BCS_BATCH(batch, 11);
2573     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2574     OUT_BCS_BATCH(batch, slice_type);
2575     OUT_BCS_BATCH(batch, 
2576                   (num_ref_idx_l1 << 24) |
2577                   (num_ref_idx_l0 << 16) |
2578                   (0 << 8) |
2579                   (0 << 0));
2580     OUT_BCS_BATCH(batch, 
2581                   (0 << 29) |
2582                   (1 << 27) |   /* disable Deblocking */
2583                   (0 << 24) |
2584                   (gen7_jpeg_wa_clip.qp << 16) |
2585                   (0 << 8) |
2586                   (0 << 0));
2587     OUT_BCS_BATCH(batch, 
2588                   (slice_ver_pos << 24) |
2589                   (slice_hor_pos << 16) | 
2590                   (first_mb_in_slice << 0));
2591     OUT_BCS_BATCH(batch,
2592                   (next_slice_ver_pos << 16) |
2593                   (next_slice_hor_pos << 0));
2594     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2595     OUT_BCS_BATCH(batch, 0);
2596     OUT_BCS_BATCH(batch, 0);
2597     OUT_BCS_BATCH(batch, 0);
2598     OUT_BCS_BATCH(batch, 0);
2599     ADVANCE_BCS_BATCH(batch);
2600 }
2601
2602 static void
2603 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2604                  struct gen7_mfd_context *gen7_mfd_context)
2605 {
2606     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2607     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2608     intel_batchbuffer_emit_mi_flush(batch);
2609     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2610     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2611     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2612     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2613     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2614     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2615     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2616
2617     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2618     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2619     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2620 }
2621
2622 #endif
2623
2624 void
2625 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2626                              struct decode_state *decode_state,
2627                              struct gen7_mfd_context *gen7_mfd_context)
2628 {
2629     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2630     VAPictureParameterBufferJPEGBaseline *pic_param;
2631     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2632     dri_bo *slice_data_bo;
2633     int i, j, max_selector = 0;
2634
2635     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2636     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2637
2638     /* Currently only support Baseline DCT */
2639     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2640     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2641 #ifdef JPEG_WA
2642     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2643 #endif
2644     intel_batchbuffer_emit_mi_flush(batch);
2645     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2646     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2647     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2648     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2649     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2650
2651     for (j = 0; j < decode_state->num_slice_params; j++) {
2652         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2653         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2654         slice_data_bo = decode_state->slice_datas[j]->bo;
2655         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2656
2657         if (j == decode_state->num_slice_params - 1)
2658             next_slice_group_param = NULL;
2659         else
2660             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2661
2662         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2663             int component;
2664
2665             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2666
2667             if (i < decode_state->slice_params[j]->num_elements - 1)
2668                 next_slice_param = slice_param + 1;
2669             else
2670                 next_slice_param = next_slice_group_param;
2671
2672             for (component = 0; component < slice_param->num_components; component++) {
2673                 if (max_selector < slice_param->components[component].dc_table_selector)
2674                     max_selector = slice_param->components[component].dc_table_selector;
2675
2676                 if (max_selector < slice_param->components[component].ac_table_selector)
2677                     max_selector = slice_param->components[component].ac_table_selector;
2678             }
2679
2680             slice_param++;
2681         }
2682     }
2683
2684     assert(max_selector < 2);
2685     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2686
2687     for (j = 0; j < decode_state->num_slice_params; j++) {
2688         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2689         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2690         slice_data_bo = decode_state->slice_datas[j]->bo;
2691         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2692
2693         if (j == decode_state->num_slice_params - 1)
2694             next_slice_group_param = NULL;
2695         else
2696             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2697
2698         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2699             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2700
2701             if (i < decode_state->slice_params[j]->num_elements - 1)
2702                 next_slice_param = slice_param + 1;
2703             else
2704                 next_slice_param = next_slice_group_param;
2705
2706             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2707             slice_param++;
2708         }
2709     }
2710
2711     intel_batchbuffer_end_atomic(batch);
2712     intel_batchbuffer_flush(batch);
2713 }
2714
2715 static void
2716 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2717                           struct decode_state *decode_state,
2718                           struct gen7_mfd_context *gen7_mfd_context)
2719 {
2720     struct object_surface *obj_surface;
2721     struct i965_driver_data *i965 = i965_driver_data(ctx);
2722     dri_bo *bo;
2723     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2724     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2725     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2726
2727     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2728     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2729
2730     /* Current decoded picture */
2731     obj_surface = decode_state->render_object;
2732     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2733
2734     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2735     gen7_mfd_context->post_deblocking_output.bo = NULL;
2736     gen7_mfd_context->post_deblocking_output.valid = 0;
2737
2738     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2739     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2740     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2741     gen7_mfd_context->pre_deblocking_output.valid = 1;
2742
2743     /* The same as AVC */
2744     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2745     bo = dri_bo_alloc(i965->intel.bufmgr,
2746                       "intra row store",
2747                       width_in_mbs * 64,
2748                       0x1000);
2749     assert(bo);
2750     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2751     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2752
2753     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2754     bo = dri_bo_alloc(i965->intel.bufmgr,
2755                       "deblocking filter row store",
2756                       width_in_mbs * 64 * 4,
2757                       0x1000);
2758     assert(bo);
2759     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2760     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2761
2762     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2763     bo = dri_bo_alloc(i965->intel.bufmgr,
2764                       "bsd mpc row store",
2765                       width_in_mbs * 64 * 2,
2766                       0x1000);
2767     assert(bo);
2768     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2769     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2770
2771     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2772     bo = dri_bo_alloc(i965->intel.bufmgr,
2773                       "mpr row store",
2774                       width_in_mbs * 64 * 2,
2775                       0x1000);
2776     assert(bo);
2777     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2778     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2779
2780     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2781 }
2782
2783 static void
2784 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2785                        struct decode_state *decode_state,
2786                        struct gen7_mfd_context *gen7_mfd_context)
2787 {
2788     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2789     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2790     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2791     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2792     dri_bo *probs_bo = decode_state->probability_data->bo;
2793     int i, j,log2num;
2794
2795     log2num = (int)log2(slice_param->num_of_partitions - 1);
2796
2797     BEGIN_BCS_BATCH(batch, 38);
2798     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2799     OUT_BCS_BATCH(batch,
2800                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2801                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2802     OUT_BCS_BATCH(batch,
2803                   log2num << 24 |
2804                   pic_param->pic_fields.bits.sharpness_level << 16 |
2805                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2806                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2807                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2808                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2809                   pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2810                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2811                   0 << 7 | /* segmentation id streamin disabled */
2812                   0 << 6 | /* segmentation id streamout disabled */
2813                   pic_param->pic_fields.bits.key_frame << 5 |
2814                   pic_param->pic_fields.bits.filter_type << 4 |
2815                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2816                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2817
2818     OUT_BCS_BATCH(batch,
2819                   pic_param->loop_filter_level[3] << 24 |
2820                   pic_param->loop_filter_level[2] << 16 |
2821                   pic_param->loop_filter_level[1] <<  8 |
2822                   pic_param->loop_filter_level[0] <<  0);
2823
2824     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2825     for (i = 0; i < 4; i++) {
2826         OUT_BCS_BATCH(batch,
2827                       iq_matrix->quantization_index[i][0] << 16 | /* Y1AC */
2828                       iq_matrix->quantization_index[i][1] <<  0); /* Y1DC */
2829         OUT_BCS_BATCH(batch,
2830                       iq_matrix->quantization_index[i][5] << 16 | /* UVAC */
2831                       iq_matrix->quantization_index[i][4] <<  0); /* UVDC */
2832         OUT_BCS_BATCH(batch,
2833                       iq_matrix->quantization_index[i][3] << 16 | /* Y2AC */
2834                       iq_matrix->quantization_index[i][2] <<  0); /* Y2DC */
2835     }
2836
2837     /* CoeffProbability table for non-key frame, DW16-DW18 */
2838     if (probs_bo) {
2839         OUT_BCS_RELOC(batch, probs_bo,
2840                       0, I915_GEM_DOMAIN_INSTRUCTION,
2841                       0);
2842         OUT_BCS_BATCH(batch, 0);
2843         OUT_BCS_BATCH(batch, 0);
2844     } else {
2845         OUT_BCS_BATCH(batch, 0);
2846         OUT_BCS_BATCH(batch, 0);
2847         OUT_BCS_BATCH(batch, 0);
2848     }
2849
2850     OUT_BCS_BATCH(batch,
2851                   pic_param->mb_segment_tree_probs[2] << 16 |
2852                   pic_param->mb_segment_tree_probs[1] <<  8 |
2853                   pic_param->mb_segment_tree_probs[0] <<  0);
2854
2855     OUT_BCS_BATCH(batch,
2856                   pic_param->prob_skip_false << 24 |
2857                   pic_param->prob_intra      << 16 |
2858                   pic_param->prob_last       <<  8 |
2859                   pic_param->prob_gf         <<  0);
2860
2861     OUT_BCS_BATCH(batch,
2862                   pic_param->y_mode_probs[3] << 24 |
2863                   pic_param->y_mode_probs[2] << 16 |
2864                   pic_param->y_mode_probs[1] <<  8 |
2865                   pic_param->y_mode_probs[0] <<  0);
2866
2867     OUT_BCS_BATCH(batch,
2868                   pic_param->uv_mode_probs[2] << 16 |
2869                   pic_param->uv_mode_probs[1] <<  8 |
2870                   pic_param->uv_mode_probs[0] <<  0);
2871     
2872     /* MV update value, DW23-DW32 */
2873     for (i = 0; i < 2; i++) {
2874         for (j = 0; j < 20; j += 4) {
2875             OUT_BCS_BATCH(batch,
2876                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2877                           pic_param->mv_probs[i][j + 2] << 16 |
2878                           pic_param->mv_probs[i][j + 1] <<  8 |
2879                           pic_param->mv_probs[i][j + 0] <<  0);
2880         }
2881     }
2882
2883     OUT_BCS_BATCH(batch,
2884                   pic_param->loop_filter_deltas_ref_frame[3] << 24 |
2885                   pic_param->loop_filter_deltas_ref_frame[2] << 16 |
2886                   pic_param->loop_filter_deltas_ref_frame[1] <<  8 |
2887                   pic_param->loop_filter_deltas_ref_frame[0] <<  0);
2888
2889     OUT_BCS_BATCH(batch,
2890                   pic_param->loop_filter_deltas_mode[3] << 24 |
2891                   pic_param->loop_filter_deltas_mode[2] << 16 |
2892                   pic_param->loop_filter_deltas_mode[1] <<  8 |
2893                   pic_param->loop_filter_deltas_mode[0] <<  0);
2894
2895     /* segmentation id stream base address, DW35-DW37 */
2896     OUT_BCS_BATCH(batch, 0);
2897     OUT_BCS_BATCH(batch, 0);
2898     OUT_BCS_BATCH(batch, 0);
2899     ADVANCE_BCS_BATCH(batch);
2900 }
2901
2902 static void
2903 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2904                         VAPictureParameterBufferVP8 *pic_param,
2905                         VASliceParameterBufferVP8 *slice_param,
2906                         dri_bo *slice_data_bo,
2907                         struct gen7_mfd_context *gen7_mfd_context)
2908 {
2909     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2910     int i, log2num;
2911     unsigned int offset = slice_param->slice_data_offset;
2912
2913     assert(slice_param->num_of_partitions >= 2);
2914     assert(slice_param->num_of_partitions <= 9);
2915
2916     log2num = (int)log2(slice_param->num_of_partitions - 1);
2917
2918     BEGIN_BCS_BATCH(batch, 22);
2919     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2920     OUT_BCS_BATCH(batch,
2921                   pic_param->bool_coder_ctx.count << 16 | /* Partition 0 CPBAC Entropy Count */
2922                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2923                   log2num << 4 |
2924                   (slice_param->macroblock_offset & 0x7));
2925     OUT_BCS_BATCH(batch,
2926                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2927                   0);
2928
2929     for (i = 0; i < 9; i++) {
2930         if (i < slice_param->num_of_partitions) {
2931             OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2932             OUT_BCS_BATCH(batch, offset);
2933         } else {
2934             OUT_BCS_BATCH(batch, 0);
2935             OUT_BCS_BATCH(batch, 0);
2936         }
2937
2938         offset += slice_param->partition_size[i];
2939     }
2940
2941     OUT_BCS_BATCH(batch,
2942                   1 << 31 | /* concealment method */
2943                   0);
2944
2945     ADVANCE_BCS_BATCH(batch);
2946 }
2947
2948 void
2949 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
2950                             struct decode_state *decode_state,
2951                             struct gen7_mfd_context *gen7_mfd_context)
2952 {
2953     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2954     VAPictureParameterBufferVP8 *pic_param;
2955     VASliceParameterBufferVP8 *slice_param;
2956     dri_bo *slice_data_bo;
2957
2958     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2959     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2960
2961     /* one slice per frame */
2962     assert(decode_state->num_slice_params == 1);
2963     assert(decode_state->slice_params[0]->num_elements == 1);
2964     assert(decode_state->slice_params && decode_state->slice_params[0]->buffer);
2965     assert(decode_state->slice_datas[0]->bo);
2966
2967     assert(decode_state->probability_data);
2968
2969     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
2970     slice_data_bo = decode_state->slice_datas[0]->bo;
2971
2972     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
2973     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2974     intel_batchbuffer_emit_mi_flush(batch);
2975     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
2976     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
2977     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
2978     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
2979     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
2980     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
2981     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
2982     intel_batchbuffer_end_atomic(batch);
2983     intel_batchbuffer_flush(batch);
2984 }
2985
2986 static VAStatus
2987 gen8_mfd_decode_picture(VADriverContextP ctx, 
2988                         VAProfile profile, 
2989                         union codec_state *codec_state,
2990                         struct hw_context *hw_context)
2991
2992 {
2993     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2994     struct decode_state *decode_state = &codec_state->decode;
2995     VAStatus vaStatus;
2996
2997     assert(gen7_mfd_context);
2998
2999     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3000
3001     if (vaStatus != VA_STATUS_SUCCESS)
3002         goto out;
3003
3004     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3005
3006     switch (profile) {
3007     case VAProfileMPEG2Simple:
3008     case VAProfileMPEG2Main:
3009         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3010         break;
3011         
3012     case VAProfileH264Baseline:
3013     case VAProfileH264Main:
3014     case VAProfileH264High:
3015         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3016         break;
3017
3018     case VAProfileVC1Simple:
3019     case VAProfileVC1Main:
3020     case VAProfileVC1Advanced:
3021         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3022         break;
3023
3024     case VAProfileJPEGBaseline:
3025         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3026         break;
3027
3028     case VAProfileVP8Version0_3:
3029         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3030         break;
3031
3032     default:
3033         assert(0);
3034         break;
3035     }
3036
3037     vaStatus = VA_STATUS_SUCCESS;
3038
3039 out:
3040     return vaStatus;
3041 }
3042
3043 static void
3044 gen8_mfd_context_destroy(void *hw_context)
3045 {
3046     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3047
3048     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3049     gen7_mfd_context->post_deblocking_output.bo = NULL;
3050
3051     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3052     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3053
3054     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3055     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3056
3057     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3058     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3059
3060     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3061     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3062
3063     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3064     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3065
3066     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3067     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3068
3069     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3070
3071     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3072     free(gen7_mfd_context);
3073 }
3074
3075 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3076                                     struct gen7_mfd_context *gen7_mfd_context)
3077 {
3078     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3079     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3080     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3081     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3082 }
3083
3084 struct hw_context *
3085 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3086 {
3087     struct intel_driver_data *intel = intel_driver_data(ctx);
3088     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3089     int i;
3090
3091     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3092     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3093     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3094
3095     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3096         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3097         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3098     }
3099
3100     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3101
3102     switch (obj_config->profile) {
3103     case VAProfileMPEG2Simple:
3104     case VAProfileMPEG2Main:
3105         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3106         break;
3107
3108     case VAProfileH264Baseline:
3109     case VAProfileH264Main:
3110     case VAProfileH264High:
3111         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3112         break;
3113     default:
3114         break;
3115     }
3116     return (struct hw_context *)gen7_mfd_context;
3117 }