OSDN Git Service

decoder: h264: factor out allocation of reconstructed surfaces.
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77         assert((obj_surface->size & 0x3f) == 0);
78         obj_surface->private_data = gen7_avc_surface;
79     }
80
81     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
82                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
83
84     if (gen7_avc_surface->dmv_top == NULL) {
85         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86                                                  "direct mv w/r buffer",
87                                                  width_in_mbs * height_in_mbs * 128,
88                                                  0x1000);
89         assert(gen7_avc_surface->dmv_top);
90     }
91
92     if (gen7_avc_surface->dmv_bottom_flag &&
93         gen7_avc_surface->dmv_bottom == NULL) {
94         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
95                                                     "direct mv w/r buffer",
96                                                     width_in_mbs * height_in_mbs * 128,                                                    
97                                                     0x1000);
98         assert(gen7_avc_surface->dmv_bottom);
99     }
100 }
101
102 static void
103 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
104                           struct decode_state *decode_state,
105                           int standard_select,
106                           struct gen7_mfd_context *gen7_mfd_context)
107 {
108     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
109
110     assert(standard_select == MFX_FORMAT_MPEG2 ||
111            standard_select == MFX_FORMAT_AVC ||
112            standard_select == MFX_FORMAT_VC1 ||
113            standard_select == MFX_FORMAT_JPEG ||
114            standard_select == MFX_FORMAT_VP8);
115
116     BEGIN_BCS_BATCH(batch, 5);
117     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
118     OUT_BCS_BATCH(batch,
119                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
120                   (MFD_MODE_VLD << 15) | /* VLD mode */
121                   (0 << 10) | /* disable Stream-Out */
122                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
123                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
124                   (0 << 5)  | /* not in stitch mode */
125                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
126                   (standard_select << 0));
127     OUT_BCS_BATCH(batch,
128                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
129                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
130                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
131                   (0 << 1)  |
132                   (0 << 0));
133     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
134     OUT_BCS_BATCH(batch, 0); /* reserved */
135     ADVANCE_BCS_BATCH(batch);
136 }
137
138 static void
139 gen8_mfd_surface_state(VADriverContextP ctx,
140                        struct decode_state *decode_state,
141                        int standard_select,
142                        struct gen7_mfd_context *gen7_mfd_context)
143 {
144     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
145     struct object_surface *obj_surface = decode_state->render_object;
146     unsigned int y_cb_offset;
147     unsigned int y_cr_offset;
148
149     assert(obj_surface);
150
151     y_cb_offset = obj_surface->y_cb_offset;
152     y_cr_offset = obj_surface->y_cr_offset;
153
154     BEGIN_BCS_BATCH(batch, 6);
155     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
156     OUT_BCS_BATCH(batch, 0);
157     OUT_BCS_BATCH(batch,
158                   ((obj_surface->orig_height - 1) << 18) |
159                   ((obj_surface->orig_width - 1) << 4));
160     OUT_BCS_BATCH(batch,
161                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
162                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
163                   (0 << 22) | /* surface object control state, ignored */
164                   ((obj_surface->width - 1) << 3) | /* pitch */
165                   (0 << 2)  | /* must be 0 */
166                   (1 << 1)  | /* must be tiled */
167                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
168     OUT_BCS_BATCH(batch,
169                   (0 << 16) | /* X offset for U(Cb), must be 0 */
170                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
171     OUT_BCS_BATCH(batch,
172                   (0 << 16) | /* X offset for V(Cr), must be 0 */
173                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
174     ADVANCE_BCS_BATCH(batch);
175 }
176
177 static void
178 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
179                              struct decode_state *decode_state,
180                              int standard_select,
181                              struct gen7_mfd_context *gen7_mfd_context)
182 {
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188         /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                       0);
193     else
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197         OUT_BCS_BATCH(batch, 0);
198         /* Post-debloing 4-6 */
199     if (gen7_mfd_context->post_deblocking_output.valid)
200         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
201                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202                       0);
203     else
204         OUT_BCS_BATCH(batch, 0);
205
206         OUT_BCS_BATCH(batch, 0);
207         OUT_BCS_BATCH(batch, 0);
208
209         /* uncompressed-video & stream out 7-12 */
210     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
211     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
212         OUT_BCS_BATCH(batch, 0);
213         OUT_BCS_BATCH(batch, 0);
214         OUT_BCS_BATCH(batch, 0);
215         OUT_BCS_BATCH(batch, 0);
216
217         /* intra row-store scratch 13-15 */
218     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
219         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
220                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
221                       0);
222     else
223         OUT_BCS_BATCH(batch, 0);
224
225         OUT_BCS_BATCH(batch, 0);
226         OUT_BCS_BATCH(batch, 0);
227         /* deblocking-filter-row-store 16-18 */
228     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
229         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
230                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
231                       0);
232     else
233         OUT_BCS_BATCH(batch, 0);
234         OUT_BCS_BATCH(batch, 0);
235         OUT_BCS_BATCH(batch, 0);
236
237     /* DW 19..50 */
238     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
239         struct object_surface *obj_surface;
240
241         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
242             gen7_mfd_context->reference_surface[i].obj_surface &&
243             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
244             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
245
246             OUT_BCS_RELOC(batch, obj_surface->bo,
247                           I915_GEM_DOMAIN_INSTRUCTION, 0,
248                           0);
249         } else {
250             OUT_BCS_BATCH(batch, 0);
251         }
252         
253         OUT_BCS_BATCH(batch, 0);
254     }
255     
256     /* reference property 51 */
257     OUT_BCS_BATCH(batch, 0);  
258         
259     /* Macroblock status & ILDB 52-57 */
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262     OUT_BCS_BATCH(batch, 0);
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265     OUT_BCS_BATCH(batch, 0);
266
267     /* the second Macroblock status 58-60 */    
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271
272     ADVANCE_BCS_BATCH(batch);
273 }
274
275 static void
276 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
277                                  dri_bo *slice_data_bo,
278                                  int standard_select,
279                                  struct gen7_mfd_context *gen7_mfd_context)
280 {
281     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
282
283     BEGIN_BCS_BATCH(batch, 26);
284     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
285         /* MFX In BS 1-5 */
286     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
287     OUT_BCS_BATCH(batch, 0);
288     OUT_BCS_BATCH(batch, 0);
289         /* Upper bound 4-5 */   
290     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
291     OUT_BCS_BATCH(batch, 0);
292
293         /* MFX indirect MV 6-10 */
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0);
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299         
300         /* MFX IT_COFF 11-15 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307         /* MFX IT_DBLK 16-20 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314         /* MFX PAK_BSE object for encoder 21-25 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321     ADVANCE_BCS_BATCH(batch);
322 }
323
324 static void
325 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
326                                  struct decode_state *decode_state,
327                                  int standard_select,
328                                  struct gen7_mfd_context *gen7_mfd_context)
329 {
330     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
331
332     BEGIN_BCS_BATCH(batch, 10);
333     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
334
335     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
336         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
337                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
338                       0);
339         else
340                 OUT_BCS_BATCH(batch, 0);
341                 
342     OUT_BCS_BATCH(batch, 0);
343     OUT_BCS_BATCH(batch, 0);
344         /* MPR Row Store Scratch buffer 4-6 */
345     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
346         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
347                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
348                       0);
349     else
350         OUT_BCS_BATCH(batch, 0);
351
352     OUT_BCS_BATCH(batch, 0);
353     OUT_BCS_BATCH(batch, 0);
354
355         /* Bitplane 7-9 */ 
356     if (gen7_mfd_context->bitplane_read_buffer.valid)
357         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
358                       I915_GEM_DOMAIN_INSTRUCTION, 0,
359                       0);
360     else
361         OUT_BCS_BATCH(batch, 0);
362     OUT_BCS_BATCH(batch, 0);
363     OUT_BCS_BATCH(batch, 0);
364     ADVANCE_BCS_BATCH(batch);
365 }
366
367 static void
368 gen8_mfd_qm_state(VADriverContextP ctx,
369                   int qm_type,
370                   unsigned char *qm,
371                   int qm_length,
372                   struct gen7_mfd_context *gen7_mfd_context)
373 {
374     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
375     unsigned int qm_buffer[16];
376
377     assert(qm_length <= 16 * 4);
378     memcpy(qm_buffer, qm, qm_length);
379
380     BEGIN_BCS_BATCH(batch, 18);
381     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
382     OUT_BCS_BATCH(batch, qm_type << 0);
383     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
384     ADVANCE_BCS_BATCH(batch);
385 }
386
387 static void
388 gen8_mfd_avc_img_state(VADriverContextP ctx,
389                        struct decode_state *decode_state,
390                        struct gen7_mfd_context *gen7_mfd_context)
391 {
392     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
393     int img_struct;
394     int mbaff_frame_flag;
395     unsigned int width_in_mbs, height_in_mbs;
396     VAPictureParameterBufferH264 *pic_param;
397
398     assert(decode_state->pic_param && decode_state->pic_param->buffer);
399     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
400     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
401
402     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
403         img_struct = 1;
404     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
405         img_struct = 3;
406     else
407         img_struct = 0;
408
409     if ((img_struct & 0x1) == 0x1) {
410         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
411     } else {
412         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
413     }
414
415     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
416         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
417         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
418     } else {
419         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
420     }
421
422     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
423                         !pic_param->pic_fields.bits.field_pic_flag);
424
425     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
426     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
427
428     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
429     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
430            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
431     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
432
433     BEGIN_BCS_BATCH(batch, 17);
434     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
435     OUT_BCS_BATCH(batch, 
436                   (width_in_mbs * height_in_mbs - 1));
437     OUT_BCS_BATCH(batch, 
438                   ((height_in_mbs - 1) << 16) | 
439                   ((width_in_mbs - 1) << 0));
440     OUT_BCS_BATCH(batch, 
441                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
442                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
443                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
444                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
445                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
446                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
447                   (img_struct << 8));
448     OUT_BCS_BATCH(batch,
449                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
450                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
451                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
452                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
453                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
454                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
455                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
456                   (mbaff_frame_flag << 1) |
457                   (pic_param->pic_fields.bits.field_pic_flag << 0));
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     OUT_BCS_BATCH(batch, 0);
465     OUT_BCS_BATCH(batch, 0);
466     OUT_BCS_BATCH(batch, 0);
467     OUT_BCS_BATCH(batch, 0);
468     OUT_BCS_BATCH(batch, 0);
469     OUT_BCS_BATCH(batch, 0);
470     ADVANCE_BCS_BATCH(batch);
471 }
472
473 static void
474 gen8_mfd_avc_qm_state(VADriverContextP ctx,
475                       struct decode_state *decode_state,
476                       struct gen7_mfd_context *gen7_mfd_context)
477 {
478     VAIQMatrixBufferH264 *iq_matrix;
479     VAPictureParameterBufferH264 *pic_param;
480
481     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
482         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
483     else
484         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
485
486     assert(decode_state->pic_param && decode_state->pic_param->buffer);
487     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
488
489     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
490     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
491
492     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
493         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
494         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
495     }
496 }
497
498 static void
499 gen8_mfd_avc_picid_state(VADriverContextP ctx,
500                       struct decode_state *decode_state,
501                       struct gen7_mfd_context *gen7_mfd_context)
502 {
503     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
504
505     BEGIN_BCS_BATCH(batch, 10);
506     OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
507     OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
508     OUT_BCS_BATCH(batch, 0);
509     OUT_BCS_BATCH(batch, 0);
510     OUT_BCS_BATCH(batch, 0);
511     OUT_BCS_BATCH(batch, 0);
512     OUT_BCS_BATCH(batch, 0);
513     OUT_BCS_BATCH(batch, 0);
514     OUT_BCS_BATCH(batch, 0);
515     OUT_BCS_BATCH(batch, 0);
516     ADVANCE_BCS_BATCH(batch);
517 }
518
519 static void
520 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
521                               struct decode_state *decode_state,
522                               VAPictureParameterBufferH264 *pic_param,
523                               VASliceParameterBufferH264 *slice_param,
524                               struct gen7_mfd_context *gen7_mfd_context)
525 {
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i, j;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
545                           I915_GEM_DOMAIN_INSTRUCTION, 0,
546                           0);
547             OUT_BCS_BATCH(batch, 0);
548         } else {
549             OUT_BCS_BATCH(batch, 0);
550             OUT_BCS_BATCH(batch, 0);
551         }
552     }
553     
554     OUT_BCS_BATCH(batch, 0);
555
556     /* the current decoding frame/field */
557     va_pic = &pic_param->CurrPic;
558     obj_surface = decode_state->render_object;
559     assert(obj_surface->bo && obj_surface->private_data);
560     gen7_avc_surface = obj_surface->private_data;
561
562     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
563                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
564                   0);
565
566     OUT_BCS_BATCH(batch, 0);
567     OUT_BCS_BATCH(batch, 0);
568
569     /* POC List */
570     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
571         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
572             int found = 0;
573
574             assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
575
576             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
577                 va_pic = &pic_param->ReferenceFrames[j];
578                 
579                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
580                     continue;
581
582                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
583                     found = 1;
584                     break;
585                 }
586             }
587
588             assert(found == 1);
589             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
590             
591             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
592             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
593         } else {
594             OUT_BCS_BATCH(batch, 0);
595             OUT_BCS_BATCH(batch, 0);
596         }
597     }
598
599     va_pic = &pic_param->CurrPic;
600     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
601     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
602
603     ADVANCE_BCS_BATCH(batch);
604 }
605
606 static void
607 gen8_mfd_avc_slice_state(VADriverContextP ctx,
608                          VAPictureParameterBufferH264 *pic_param,
609                          VASliceParameterBufferH264 *slice_param,
610                          VASliceParameterBufferH264 *next_slice_param,
611                          struct gen7_mfd_context *gen7_mfd_context)
612 {
613     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
614     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
615     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
616     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
617     int num_ref_idx_l0, num_ref_idx_l1;
618     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
619                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
620     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
621     int slice_type;
622
623     if (slice_param->slice_type == SLICE_TYPE_I ||
624         slice_param->slice_type == SLICE_TYPE_SI) {
625         slice_type = SLICE_TYPE_I;
626     } else if (slice_param->slice_type == SLICE_TYPE_P ||
627                slice_param->slice_type == SLICE_TYPE_SP) {
628         slice_type = SLICE_TYPE_P;
629     } else { 
630         assert(slice_param->slice_type == SLICE_TYPE_B);
631         slice_type = SLICE_TYPE_B;
632     }
633
634     if (slice_type == SLICE_TYPE_I) {
635         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
636         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
637         num_ref_idx_l0 = 0;
638         num_ref_idx_l1 = 0;
639     } else if (slice_type == SLICE_TYPE_P) {
640         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
641         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
642         num_ref_idx_l1 = 0;
643     } else {
644         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
645         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
646     }
647
648     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
649     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
650     slice_ver_pos = first_mb_in_slice / width_in_mbs;
651
652     if (next_slice_param) {
653         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
654         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
655         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch, 
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch, 
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch, 
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) | 
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch, 
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720     
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch, 
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
839     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
840     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
841     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
842     assert(height_in_mbs > 0 && height_in_mbs <= 256);
843
844     /* Current decoded picture */
845     obj_surface = decode_state->render_object;
846     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
847     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
848
849     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
850     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
851
852     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
853     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
854     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
855     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
856
857     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
858     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
859     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
860     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
861
862     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
863     bo = dri_bo_alloc(i965->intel.bufmgr,
864                       "intra row store",
865                       width_in_mbs * 64,
866                       0x1000);
867     assert(bo);
868     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
869     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
870
871     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
872     bo = dri_bo_alloc(i965->intel.bufmgr,
873                       "deblocking filter row store",
874                       width_in_mbs * 64 * 4,
875                       0x1000);
876     assert(bo);
877     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
878     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
879
880     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
881     bo = dri_bo_alloc(i965->intel.bufmgr,
882                       "bsd mpc row store",
883                       width_in_mbs * 64 * 2,
884                       0x1000);
885     assert(bo);
886     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
887     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
888
889     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
890     bo = dri_bo_alloc(i965->intel.bufmgr,
891                       "mpr row store",
892                       width_in_mbs * 64 * 2,
893                       0x1000);
894     assert(bo);
895     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
896     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
897
898     gen7_mfd_context->bitplane_read_buffer.valid = 0;
899 }
900
901 static void
902 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
903                             struct decode_state *decode_state,
904                             struct gen7_mfd_context *gen7_mfd_context)
905 {
906     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
907     VAPictureParameterBufferH264 *pic_param;
908     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
909     dri_bo *slice_data_bo;
910     int i, j;
911
912     assert(decode_state->pic_param && decode_state->pic_param->buffer);
913     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
914     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
915
916     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
917     intel_batchbuffer_emit_mi_flush(batch);
918     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
919     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
920     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
921     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
923     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
924     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
925
926     for (j = 0; j < decode_state->num_slice_params; j++) {
927         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
928         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
929         slice_data_bo = decode_state->slice_datas[j]->bo;
930         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
931
932         if (j == decode_state->num_slice_params - 1)
933             next_slice_group_param = NULL;
934         else
935             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
936
937         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
938             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
939             assert((slice_param->slice_type == SLICE_TYPE_I) ||
940                    (slice_param->slice_type == SLICE_TYPE_SI) ||
941                    (slice_param->slice_type == SLICE_TYPE_P) ||
942                    (slice_param->slice_type == SLICE_TYPE_SP) ||
943                    (slice_param->slice_type == SLICE_TYPE_B));
944
945             if (i < decode_state->slice_params[j]->num_elements - 1)
946                 next_slice_param = slice_param + 1;
947             else
948                 next_slice_param = next_slice_group_param;
949
950             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
951             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
952             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
953             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
954             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
955             slice_param++;
956         }
957     }
958
959     intel_batchbuffer_end_atomic(batch);
960     intel_batchbuffer_flush(batch);
961 }
962
963 static void
964 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
965                            struct decode_state *decode_state,
966                            struct gen7_mfd_context *gen7_mfd_context)
967 {
968     VAPictureParameterBufferMPEG2 *pic_param;
969     struct i965_driver_data *i965 = i965_driver_data(ctx);
970     struct object_surface *obj_surface;
971     dri_bo *bo;
972     unsigned int width_in_mbs;
973
974     assert(decode_state->pic_param && decode_state->pic_param->buffer);
975     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
976     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
977
978     mpeg2_set_reference_surfaces(
979         ctx,
980         gen7_mfd_context->reference_surface,
981         decode_state,
982         pic_param
983     );
984
985     /* Current decoded picture */
986     obj_surface = decode_state->render_object;
987     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
988
989     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
990     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
991     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
992     gen7_mfd_context->pre_deblocking_output.valid = 1;
993
994     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
995     bo = dri_bo_alloc(i965->intel.bufmgr,
996                       "bsd mpc row store",
997                       width_in_mbs * 96,
998                       0x1000);
999     assert(bo);
1000     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1001     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1002
1003     gen7_mfd_context->post_deblocking_output.valid = 0;
1004     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1005     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1006     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1007     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1008 }
1009
1010 static void
1011 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1012                          struct decode_state *decode_state,
1013                          struct gen7_mfd_context *gen7_mfd_context)
1014 {
1015     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1016     VAPictureParameterBufferMPEG2 *pic_param;
1017     unsigned int slice_concealment_disable_bit = 0;
1018
1019     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1020     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1021
1022     slice_concealment_disable_bit = 1;
1023
1024     BEGIN_BCS_BATCH(batch, 13);
1025     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1026     OUT_BCS_BATCH(batch,
1027                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1028                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1029                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1030                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1031                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1032                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1033                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1034                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1035                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1036                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1037                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1038                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1039     OUT_BCS_BATCH(batch,
1040                   pic_param->picture_coding_type << 9);
1041     OUT_BCS_BATCH(batch,
1042                   (slice_concealment_disable_bit << 31) |
1043                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1044                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1045     OUT_BCS_BATCH(batch, 0);
1046     OUT_BCS_BATCH(batch, 0);
1047     OUT_BCS_BATCH(batch, 0);
1048     OUT_BCS_BATCH(batch, 0);
1049     OUT_BCS_BATCH(batch, 0);
1050     OUT_BCS_BATCH(batch, 0);
1051     OUT_BCS_BATCH(batch, 0);
1052     OUT_BCS_BATCH(batch, 0);
1053     OUT_BCS_BATCH(batch, 0);
1054     ADVANCE_BCS_BATCH(batch);
1055 }
1056
1057 static void
1058 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1059                         struct decode_state *decode_state,
1060                         struct gen7_mfd_context *gen7_mfd_context)
1061 {
1062     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1063     int i, j;
1064
1065     /* Update internal QM state */
1066     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1067         VAIQMatrixBufferMPEG2 * const iq_matrix =
1068             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1069
1070         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1071             iq_matrix->load_intra_quantiser_matrix) {
1072             gen_iq_matrix->load_intra_quantiser_matrix =
1073                 iq_matrix->load_intra_quantiser_matrix;
1074             if (iq_matrix->load_intra_quantiser_matrix) {
1075                 for (j = 0; j < 64; j++)
1076                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1077                         iq_matrix->intra_quantiser_matrix[j];
1078             }
1079         }
1080
1081         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1082             iq_matrix->load_non_intra_quantiser_matrix) {
1083             gen_iq_matrix->load_non_intra_quantiser_matrix =
1084                 iq_matrix->load_non_intra_quantiser_matrix;
1085             if (iq_matrix->load_non_intra_quantiser_matrix) {
1086                 for (j = 0; j < 64; j++)
1087                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1088                         iq_matrix->non_intra_quantiser_matrix[j];
1089             }
1090         }
1091     }
1092
1093     /* Commit QM state to HW */
1094     for (i = 0; i < 2; i++) {
1095         unsigned char *qm = NULL;
1096         int qm_type;
1097
1098         if (i == 0) {
1099             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1100                 qm = gen_iq_matrix->intra_quantiser_matrix;
1101                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1102             }
1103         } else {
1104             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1105                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1106                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1107             }
1108         }
1109
1110         if (!qm)
1111             continue;
1112
1113         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1114     }
1115 }
1116
1117 static void
1118 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1119                           VAPictureParameterBufferMPEG2 *pic_param,
1120                           VASliceParameterBufferMPEG2 *slice_param,
1121                           VASliceParameterBufferMPEG2 *next_slice_param,
1122                           struct gen7_mfd_context *gen7_mfd_context)
1123 {
1124     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1125     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1126     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1127
1128     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1129         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1130         is_field_pic = 1;
1131     is_field_pic_wa = is_field_pic &&
1132         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1133
1134     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1135     hpos0 = slice_param->slice_horizontal_position;
1136
1137     if (next_slice_param == NULL) {
1138         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1139         hpos1 = 0;
1140     } else {
1141         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1142         hpos1 = next_slice_param->slice_horizontal_position;
1143     }
1144
1145     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1146
1147     BEGIN_BCS_BATCH(batch, 5);
1148     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1149     OUT_BCS_BATCH(batch, 
1150                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1151     OUT_BCS_BATCH(batch, 
1152                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1153     OUT_BCS_BATCH(batch,
1154                   hpos0 << 24 |
1155                   vpos0 << 16 |
1156                   mb_count << 8 |
1157                   (next_slice_param == NULL) << 5 |
1158                   (next_slice_param == NULL) << 3 |
1159                   (slice_param->macroblock_offset & 0x7));
1160     OUT_BCS_BATCH(batch,
1161                   (slice_param->quantiser_scale_code << 24) |
1162                   (vpos1 << 8 | hpos1));
1163     ADVANCE_BCS_BATCH(batch);
1164 }
1165
1166 static void
1167 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1168                               struct decode_state *decode_state,
1169                               struct gen7_mfd_context *gen7_mfd_context)
1170 {
1171     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1172     VAPictureParameterBufferMPEG2 *pic_param;
1173     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1174     dri_bo *slice_data_bo;
1175     int i, j;
1176
1177     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1178     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1179
1180     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1181     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1182     intel_batchbuffer_emit_mi_flush(batch);
1183     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1184     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1185     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1186     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1187     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1188     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1189
1190     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1191         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1192             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1193
1194     for (j = 0; j < decode_state->num_slice_params; j++) {
1195         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1196         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1197         slice_data_bo = decode_state->slice_datas[j]->bo;
1198         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1199
1200         if (j == decode_state->num_slice_params - 1)
1201             next_slice_group_param = NULL;
1202         else
1203             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1204
1205         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1206             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1207
1208             if (i < decode_state->slice_params[j]->num_elements - 1)
1209                 next_slice_param = slice_param + 1;
1210             else
1211                 next_slice_param = next_slice_group_param;
1212
1213             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1214             slice_param++;
1215         }
1216     }
1217
1218     intel_batchbuffer_end_atomic(batch);
1219     intel_batchbuffer_flush(batch);
1220 }
1221
1222 static const int va_to_gen7_vc1_pic_type[5] = {
1223     GEN7_VC1_I_PICTURE,
1224     GEN7_VC1_P_PICTURE,
1225     GEN7_VC1_B_PICTURE,
1226     GEN7_VC1_BI_PICTURE,
1227     GEN7_VC1_P_PICTURE,
1228 };
1229
1230 static const int va_to_gen7_vc1_mv[4] = {
1231     1, /* 1-MV */
1232     2, /* 1-MV half-pel */
1233     3, /* 1-MV half-pef bilinear */
1234     0, /* Mixed MV */
1235 };
1236
1237 static const int b_picture_scale_factor[21] = {
1238     128, 85,  170, 64,  192,
1239     51,  102, 153, 204, 43,
1240     215, 37,  74,  111, 148,
1241     185, 222, 32,  96,  160, 
1242     224,
1243 };
1244
1245 static const int va_to_gen7_vc1_condover[3] = {
1246     0,
1247     2,
1248     3
1249 };
1250
1251 static const int va_to_gen7_vc1_profile[4] = {
1252     GEN7_VC1_SIMPLE_PROFILE,
1253     GEN7_VC1_MAIN_PROFILE,
1254     GEN7_VC1_RESERVED_PROFILE,
1255     GEN7_VC1_ADVANCED_PROFILE
1256 };
1257
1258 static void 
1259 gen8_mfd_free_vc1_surface(void **data)
1260 {
1261     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1262
1263     if (!gen7_vc1_surface)
1264         return;
1265
1266     dri_bo_unreference(gen7_vc1_surface->dmv);
1267     free(gen7_vc1_surface);
1268     *data = NULL;
1269 }
1270
1271 static void
1272 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1273                           VAPictureParameterBufferVC1 *pic_param,
1274                           struct object_surface *obj_surface)
1275 {
1276     struct i965_driver_data *i965 = i965_driver_data(ctx);
1277     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1278     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1279     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1280
1281     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1282
1283     if (!gen7_vc1_surface) {
1284         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1285         assert((obj_surface->size & 0x3f) == 0);
1286         obj_surface->private_data = gen7_vc1_surface;
1287     }
1288
1289     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1290
1291     if (gen7_vc1_surface->dmv == NULL) {
1292         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1293                                              "direct mv w/r buffer",
1294                                              width_in_mbs * height_in_mbs * 64,
1295                                              0x1000);
1296     }
1297 }
1298
1299 static void
1300 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1301                          struct decode_state *decode_state,
1302                          struct gen7_mfd_context *gen7_mfd_context)
1303 {
1304     VAPictureParameterBufferVC1 *pic_param;
1305     struct i965_driver_data *i965 = i965_driver_data(ctx);
1306     struct object_surface *obj_surface;
1307     dri_bo *bo;
1308     int width_in_mbs;
1309     int picture_type;
1310
1311     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1312     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1313     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1314     picture_type = pic_param->picture_fields.bits.picture_type;
1315  
1316     intel_update_vc1_frame_store_index(ctx,
1317                                        decode_state,
1318                                        pic_param,
1319                                        gen7_mfd_context->reference_surface);
1320
1321     /* Current decoded picture */
1322     obj_surface = decode_state->render_object;
1323     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1324     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1325
1326     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1327     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1328     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1329     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1330
1331     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1332     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1333     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1334     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1335
1336     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1337     bo = dri_bo_alloc(i965->intel.bufmgr,
1338                       "intra row store",
1339                       width_in_mbs * 64,
1340                       0x1000);
1341     assert(bo);
1342     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1343     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1344
1345     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1346     bo = dri_bo_alloc(i965->intel.bufmgr,
1347                       "deblocking filter row store",
1348                       width_in_mbs * 7 * 64,
1349                       0x1000);
1350     assert(bo);
1351     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1352     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1353
1354     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1355     bo = dri_bo_alloc(i965->intel.bufmgr,
1356                       "bsd mpc row store",
1357                       width_in_mbs * 96,
1358                       0x1000);
1359     assert(bo);
1360     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1361     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1362
1363     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1364
1365     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1366     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1367     
1368     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1369         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1370         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1371         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1372         int src_w, src_h;
1373         uint8_t *src = NULL, *dst = NULL;
1374
1375         assert(decode_state->bit_plane->buffer);
1376         src = decode_state->bit_plane->buffer;
1377
1378         bo = dri_bo_alloc(i965->intel.bufmgr,
1379                           "VC-1 Bitplane",
1380                           bitplane_width * height_in_mbs,
1381                           0x1000);
1382         assert(bo);
1383         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1384
1385         dri_bo_map(bo, True);
1386         assert(bo->virtual);
1387         dst = bo->virtual;
1388
1389         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1390             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1391                 int src_index, dst_index;
1392                 int src_shift;
1393                 uint8_t src_value;
1394
1395                 src_index = (src_h * width_in_mbs + src_w) / 2;
1396                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1397                 src_value = ((src[src_index] >> src_shift) & 0xf);
1398
1399                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1400                     src_value |= 0x2;
1401                 }
1402
1403                 dst_index = src_w / 2;
1404                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1405             }
1406
1407             if (src_w & 1)
1408                 dst[src_w / 2] >>= 4;
1409
1410             dst += bitplane_width;
1411         }
1412
1413         dri_bo_unmap(bo);
1414     } else
1415         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1416 }
1417
1418 static void
1419 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1420                        struct decode_state *decode_state,
1421                        struct gen7_mfd_context *gen7_mfd_context)
1422 {
1423     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1424     VAPictureParameterBufferVC1 *pic_param;
1425     struct object_surface *obj_surface;
1426     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1427     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1428     int unified_mv_mode;
1429     int ref_field_pic_polarity = 0;
1430     int scale_factor = 0;
1431     int trans_ac_y = 0;
1432     int dmv_surface_valid = 0;
1433     int brfd = 0;
1434     int fcm = 0;
1435     int picture_type;
1436     int profile;
1437     int overlap;
1438     int interpolation_mode = 0;
1439
1440     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1441     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1442
1443     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1444     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1445     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1446     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1447     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1448     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1449     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1450     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1451
1452     if (dquant == 0) {
1453         alt_pquant_config = 0;
1454         alt_pquant_edge_mask = 0;
1455     } else if (dquant == 2) {
1456         alt_pquant_config = 1;
1457         alt_pquant_edge_mask = 0xf;
1458     } else {
1459         assert(dquant == 1);
1460         if (dquantfrm == 0) {
1461             alt_pquant_config = 0;
1462             alt_pquant_edge_mask = 0;
1463             alt_pq = 0;
1464         } else {
1465             assert(dquantfrm == 1);
1466             alt_pquant_config = 1;
1467
1468             switch (dqprofile) {
1469             case 3:
1470                 if (dqbilevel == 0) {
1471                     alt_pquant_config = 2;
1472                     alt_pquant_edge_mask = 0;
1473                 } else {
1474                     assert(dqbilevel == 1);
1475                     alt_pquant_config = 3;
1476                     alt_pquant_edge_mask = 0;
1477                 }
1478                 break;
1479                 
1480             case 0:
1481                 alt_pquant_edge_mask = 0xf;
1482                 break;
1483
1484             case 1:
1485                 if (dqdbedge == 3)
1486                     alt_pquant_edge_mask = 0x9;
1487                 else
1488                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1489
1490                 break;
1491
1492             case 2:
1493                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1494                 break;
1495
1496             default:
1497                 assert(0);
1498             }
1499         }
1500     }
1501
1502     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1503         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1504         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1505     } else {
1506         assert(pic_param->mv_fields.bits.mv_mode < 4);
1507         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1508     }
1509
1510     if (pic_param->sequence_fields.bits.interlace == 1 &&
1511         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1512         /* FIXME: calculate reference field picture polarity */
1513         assert(0);
1514         ref_field_pic_polarity = 0;
1515     }
1516
1517     if (pic_param->b_picture_fraction < 21)
1518         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1519
1520     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1521     
1522     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1523         picture_type == GEN7_VC1_I_PICTURE)
1524         picture_type = GEN7_VC1_BI_PICTURE;
1525
1526     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1527         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1528     else {
1529         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1530
1531         /*
1532          * 8.3.6.2.1 Transform Type Selection
1533          * If variable-sized transform coding is not enabled,
1534          * then the 8x8 transform shall be used for all blocks.
1535          * it is also MFX_VC1_PIC_STATE requirement.
1536          */
1537         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1538             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1539             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1540         }
1541     }
1542
1543     if (picture_type == GEN7_VC1_B_PICTURE) {
1544         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1545
1546         obj_surface = decode_state->reference_objects[1];
1547
1548         if (obj_surface)
1549             gen7_vc1_surface = obj_surface->private_data;
1550
1551         if (!gen7_vc1_surface || 
1552             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1553              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1554             dmv_surface_valid = 0;
1555         else
1556             dmv_surface_valid = 1;
1557     }
1558
1559     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1560
1561     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1562         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1563     else {
1564         if (pic_param->picture_fields.bits.top_field_first)
1565             fcm = 2;
1566         else
1567             fcm = 3;
1568     }
1569
1570     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1571         brfd = pic_param->reference_fields.bits.reference_distance;
1572         brfd = (scale_factor * brfd) >> 8;
1573         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1574
1575         if (brfd < 0)
1576             brfd = 0;
1577     }
1578
1579     overlap = 0;
1580     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1581         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1582             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1583             overlap = 1; 
1584         }
1585     }else {
1586         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1587              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1588               overlap = 1; 
1589         }
1590         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1591             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1592              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1593                 overlap = 1; 
1594              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1595                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1596                  overlap = 1;
1597              }
1598         }
1599     } 
1600
1601     assert(pic_param->conditional_overlap_flag < 3);
1602     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1603
1604     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1605         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1606          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1607         interpolation_mode = 9; /* Half-pel bilinear */
1608     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1609              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1610               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1611         interpolation_mode = 1; /* Half-pel bicubic */
1612     else
1613         interpolation_mode = 0; /* Quarter-pel bicubic */
1614
1615     BEGIN_BCS_BATCH(batch, 6);
1616     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1617     OUT_BCS_BATCH(batch,
1618                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1619                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1620     OUT_BCS_BATCH(batch,
1621                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1622                   dmv_surface_valid << 15 |
1623                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1624                   pic_param->rounding_control << 13 |
1625                   pic_param->sequence_fields.bits.syncmarker << 12 |
1626                   interpolation_mode << 8 |
1627                   0 << 7 | /* FIXME: scale up or down ??? */
1628                   pic_param->range_reduction_frame << 6 |
1629                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1630                   overlap << 4 |
1631                   !pic_param->picture_fields.bits.is_first_field << 3 |
1632                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1633     OUT_BCS_BATCH(batch,
1634                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1635                   picture_type << 26 |
1636                   fcm << 24 |
1637                   alt_pq << 16 |
1638                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1639                   scale_factor << 0);
1640     OUT_BCS_BATCH(batch,
1641                   unified_mv_mode << 28 |
1642                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1643                   pic_param->fast_uvmc_flag << 26 |
1644                   ref_field_pic_polarity << 25 |
1645                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1646                   pic_param->reference_fields.bits.reference_distance << 20 |
1647                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1648                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1649                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1650                   alt_pquant_edge_mask << 4 |
1651                   alt_pquant_config << 2 |
1652                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1653                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1654     OUT_BCS_BATCH(batch,
1655                   !!pic_param->bitplane_present.value << 31 |
1656                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1657                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1658                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1659                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1660                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1661                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1662                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1663                   pic_param->mv_fields.bits.mv_table << 20 |
1664                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1665                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1666                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1667                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1668                   pic_param->mb_mode_table << 8 |
1669                   trans_ac_y << 6 |
1670                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1671                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1672                   pic_param->cbp_table << 0);
1673     ADVANCE_BCS_BATCH(batch);
1674 }
1675
1676 static void
1677 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1678                              struct decode_state *decode_state,
1679                              struct gen7_mfd_context *gen7_mfd_context)
1680 {
1681     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1682     VAPictureParameterBufferVC1 *pic_param;
1683     int intensitycomp_single;
1684
1685     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1686     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1687
1688     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1689     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1690     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1691
1692     BEGIN_BCS_BATCH(batch, 6);
1693     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1694     OUT_BCS_BATCH(batch,
1695                   0 << 14 | /* FIXME: double ??? */
1696                   0 << 12 |
1697                   intensitycomp_single << 10 |
1698                   intensitycomp_single << 8 |
1699                   0 << 4 | /* FIXME: interlace mode */
1700                   0);
1701     OUT_BCS_BATCH(batch,
1702                   pic_param->luma_shift << 16 |
1703                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1704     OUT_BCS_BATCH(batch, 0);
1705     OUT_BCS_BATCH(batch, 0);
1706     OUT_BCS_BATCH(batch, 0);
1707     ADVANCE_BCS_BATCH(batch);
1708 }
1709
1710 static void
1711 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1712                               struct decode_state *decode_state,
1713                               struct gen7_mfd_context *gen7_mfd_context)
1714 {
1715     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1716     struct object_surface *obj_surface;
1717     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1718
1719     obj_surface = decode_state->render_object;
1720
1721     if (obj_surface && obj_surface->private_data) {
1722         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1723     }
1724
1725     obj_surface = decode_state->reference_objects[1];
1726
1727     if (obj_surface && obj_surface->private_data) {
1728         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1729     }
1730
1731     BEGIN_BCS_BATCH(batch, 7);
1732     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1733
1734     if (dmv_write_buffer)
1735         OUT_BCS_RELOC(batch, dmv_write_buffer,
1736                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1737                       0);
1738     else
1739         OUT_BCS_BATCH(batch, 0);
1740
1741     OUT_BCS_BATCH(batch, 0);
1742     OUT_BCS_BATCH(batch, 0);
1743
1744     if (dmv_read_buffer)
1745         OUT_BCS_RELOC(batch, dmv_read_buffer,
1746                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1747                       0);
1748     else
1749         OUT_BCS_BATCH(batch, 0);
1750     
1751     OUT_BCS_BATCH(batch, 0);
1752     OUT_BCS_BATCH(batch, 0);
1753                   
1754     ADVANCE_BCS_BATCH(batch);
1755 }
1756
1757 static int
1758 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1759 {
1760     int out_slice_data_bit_offset;
1761     int slice_header_size = in_slice_data_bit_offset / 8;
1762     int i, j;
1763
1764     if (profile != 3)
1765         out_slice_data_bit_offset = in_slice_data_bit_offset;
1766     else {
1767         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1768             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1769                 i++, j += 2;
1770             }
1771         }
1772
1773         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1774     }
1775
1776     return out_slice_data_bit_offset;
1777 }
1778
1779 static void
1780 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1781                         VAPictureParameterBufferVC1 *pic_param,
1782                         VASliceParameterBufferVC1 *slice_param,
1783                         VASliceParameterBufferVC1 *next_slice_param,
1784                         dri_bo *slice_data_bo,
1785                         struct gen7_mfd_context *gen7_mfd_context)
1786 {
1787     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1788     int next_slice_start_vert_pos;
1789     int macroblock_offset;
1790     uint8_t *slice_data = NULL;
1791
1792     dri_bo_map(slice_data_bo, 0);
1793     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1794     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1795                                                                slice_param->macroblock_offset,
1796                                                                pic_param->sequence_fields.bits.profile);
1797     dri_bo_unmap(slice_data_bo);
1798
1799     if (next_slice_param)
1800         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1801     else
1802         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1803
1804     BEGIN_BCS_BATCH(batch, 5);
1805     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1806     OUT_BCS_BATCH(batch, 
1807                   slice_param->slice_data_size - (macroblock_offset >> 3));
1808     OUT_BCS_BATCH(batch, 
1809                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1810     OUT_BCS_BATCH(batch,
1811                   slice_param->slice_vertical_position << 16 |
1812                   next_slice_start_vert_pos << 0);
1813     OUT_BCS_BATCH(batch,
1814                   (macroblock_offset & 0x7));
1815     ADVANCE_BCS_BATCH(batch);
1816 }
1817
1818 static void
1819 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1820                             struct decode_state *decode_state,
1821                             struct gen7_mfd_context *gen7_mfd_context)
1822 {
1823     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1824     VAPictureParameterBufferVC1 *pic_param;
1825     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1826     dri_bo *slice_data_bo;
1827     int i, j;
1828
1829     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1830     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1831
1832     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1833     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1834     intel_batchbuffer_emit_mi_flush(batch);
1835     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1836     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1837     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1838     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1839     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1840     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1841     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1842
1843     for (j = 0; j < decode_state->num_slice_params; j++) {
1844         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1845         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1846         slice_data_bo = decode_state->slice_datas[j]->bo;
1847         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1848
1849         if (j == decode_state->num_slice_params - 1)
1850             next_slice_group_param = NULL;
1851         else
1852             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1853
1854         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1855             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1856
1857             if (i < decode_state->slice_params[j]->num_elements - 1)
1858                 next_slice_param = slice_param + 1;
1859             else
1860                 next_slice_param = next_slice_group_param;
1861
1862             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1863             slice_param++;
1864         }
1865     }
1866
1867     intel_batchbuffer_end_atomic(batch);
1868     intel_batchbuffer_flush(batch);
1869 }
1870
1871 static void
1872 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1873                           struct decode_state *decode_state,
1874                           struct gen7_mfd_context *gen7_mfd_context)
1875 {
1876     struct object_surface *obj_surface;
1877     VAPictureParameterBufferJPEGBaseline *pic_param;
1878     int subsampling = SUBSAMPLE_YUV420;
1879     int fourcc = VA_FOURCC_IMC3;
1880
1881     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1882
1883     if (pic_param->num_components == 1)
1884         subsampling = SUBSAMPLE_YUV400;
1885     else if (pic_param->num_components == 3) {
1886         int h1 = pic_param->components[0].h_sampling_factor;
1887         int h2 = pic_param->components[1].h_sampling_factor;
1888         int h3 = pic_param->components[2].h_sampling_factor;
1889         int v1 = pic_param->components[0].v_sampling_factor;
1890         int v2 = pic_param->components[1].v_sampling_factor;
1891         int v3 = pic_param->components[2].v_sampling_factor;
1892
1893         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1894             v1 == 2 && v2 == 1 && v3 == 1) {
1895             subsampling = SUBSAMPLE_YUV420;
1896             fourcc = VA_FOURCC_IMC3;
1897         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1898                    v1 == 1 && v2 == 1 && v3 == 1) {
1899             subsampling = SUBSAMPLE_YUV422H;
1900             fourcc = VA_FOURCC_422H;
1901         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1902                    v1 == 1 && v2 == 1 && v3 == 1) {
1903             subsampling = SUBSAMPLE_YUV444;
1904             fourcc = VA_FOURCC_444P;
1905         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1906                    v1 == 1 && v2 == 1 && v3 == 1) {
1907             subsampling = SUBSAMPLE_YUV411;
1908             fourcc = VA_FOURCC_411P;
1909         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1910                    v1 == 2 && v2 == 1 && v3 == 1) {
1911             subsampling = SUBSAMPLE_YUV422V;
1912             fourcc = VA_FOURCC_422V;
1913         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1914                    v1 == 2 && v2 == 2 && v3 == 2) {
1915             subsampling = SUBSAMPLE_YUV422H;
1916             fourcc = VA_FOURCC_422H;
1917         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1918                    v1 == 2 && v2 == 1 && v3 == 1) {
1919             subsampling = SUBSAMPLE_YUV422V;
1920             fourcc = VA_FOURCC_422V;
1921         } else
1922             assert(0);
1923     }
1924     else {
1925         assert(0);
1926     }
1927
1928     /* Current decoded picture */
1929     obj_surface = decode_state->render_object;
1930     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1931
1932     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1933     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1934     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1935     gen7_mfd_context->pre_deblocking_output.valid = 1;
1936
1937     gen7_mfd_context->post_deblocking_output.bo = NULL;
1938     gen7_mfd_context->post_deblocking_output.valid = 0;
1939
1940     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1941     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1942
1943     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1944     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1945
1946     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1947     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1948
1949     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1950     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1951
1952     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1953     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1954 }
1955
1956 static const int va_to_gen7_jpeg_rotation[4] = {
1957     GEN7_JPEG_ROTATION_0,
1958     GEN7_JPEG_ROTATION_90,
1959     GEN7_JPEG_ROTATION_180,
1960     GEN7_JPEG_ROTATION_270
1961 };
1962
1963 static void
1964 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1965                         struct decode_state *decode_state,
1966                         struct gen7_mfd_context *gen7_mfd_context)
1967 {
1968     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1969     VAPictureParameterBufferJPEGBaseline *pic_param;
1970     int chroma_type = GEN7_YUV420;
1971     int frame_width_in_blks;
1972     int frame_height_in_blks;
1973
1974     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1975     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1976
1977     if (pic_param->num_components == 1)
1978         chroma_type = GEN7_YUV400;
1979     else if (pic_param->num_components == 3) {
1980         int h1 = pic_param->components[0].h_sampling_factor;
1981         int h2 = pic_param->components[1].h_sampling_factor;
1982         int h3 = pic_param->components[2].h_sampling_factor;
1983         int v1 = pic_param->components[0].v_sampling_factor;
1984         int v2 = pic_param->components[1].v_sampling_factor;
1985         int v3 = pic_param->components[2].v_sampling_factor;
1986
1987         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1988             v1 == 2 && v2 == 1 && v3 == 1)
1989             chroma_type = GEN7_YUV420;
1990         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1991                  v1 == 1 && v2 == 1 && v3 == 1)
1992             chroma_type = GEN7_YUV422H_2Y;
1993         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1994                  v1 == 1 && v2 == 1 && v3 == 1)
1995             chroma_type = GEN7_YUV444;
1996         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1997                  v1 == 1 && v2 == 1 && v3 == 1)
1998             chroma_type = GEN7_YUV411;
1999         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2000                  v1 == 2 && v2 == 1 && v3 == 1)
2001             chroma_type = GEN7_YUV422V_2Y;
2002         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2003                  v1 == 2 && v2 == 2 && v3 == 2)
2004             chroma_type = GEN7_YUV422H_4Y;
2005         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2006                  v1 == 2 && v2 == 1 && v3 == 1)
2007             chroma_type = GEN7_YUV422V_4Y;
2008         else
2009             assert(0);
2010     }
2011
2012     if (chroma_type == GEN7_YUV400 ||
2013         chroma_type == GEN7_YUV444 ||
2014         chroma_type == GEN7_YUV422V_2Y) {
2015         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2016         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2017     } else if (chroma_type == GEN7_YUV411) {
2018         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2019         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2020     } else {
2021         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2022         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2023     }
2024
2025     BEGIN_BCS_BATCH(batch, 3);
2026     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2027     OUT_BCS_BATCH(batch,
2028                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2029                   (chroma_type << 0));
2030     OUT_BCS_BATCH(batch,
2031                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2032                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2033     ADVANCE_BCS_BATCH(batch);
2034 }
2035
2036 static const int va_to_gen7_jpeg_hufftable[2] = {
2037     MFX_HUFFTABLE_ID_Y,
2038     MFX_HUFFTABLE_ID_UV
2039 };
2040
2041 static void
2042 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2043                                struct decode_state *decode_state,
2044                                struct gen7_mfd_context *gen7_mfd_context,
2045                                int num_tables)
2046 {
2047     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2048     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2049     int index;
2050
2051     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2052         return;
2053
2054     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2055
2056     for (index = 0; index < num_tables; index++) {
2057         int id = va_to_gen7_jpeg_hufftable[index];
2058         if (!huffman_table->load_huffman_table[index])
2059             continue;
2060         BEGIN_BCS_BATCH(batch, 53);
2061         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2062         OUT_BCS_BATCH(batch, id);
2063         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2064         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2065         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2066         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2067         ADVANCE_BCS_BATCH(batch);
2068     }
2069 }
2070
2071 static const int va_to_gen7_jpeg_qm[5] = {
2072     -1,
2073     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2074     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2075     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2076     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2077 };
2078
2079 static void
2080 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2081                        struct decode_state *decode_state,
2082                        struct gen7_mfd_context *gen7_mfd_context)
2083 {
2084     VAPictureParameterBufferJPEGBaseline *pic_param;
2085     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2086     int index;
2087
2088     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2089         return;
2090
2091     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2092     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2093
2094     assert(pic_param->num_components <= 3);
2095
2096     for (index = 0; index < pic_param->num_components; index++) {
2097         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2098         int qm_type;
2099         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2100         unsigned char raster_qm[64];
2101         int j;
2102
2103         if (id > 4 || id < 1)
2104             continue;
2105
2106         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2107             continue;
2108
2109         qm_type = va_to_gen7_jpeg_qm[id];
2110
2111         for (j = 0; j < 64; j++)
2112             raster_qm[zigzag_direct[j]] = qm[j];
2113
2114         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2115     }
2116 }
2117
2118 static void
2119 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2120                          VAPictureParameterBufferJPEGBaseline *pic_param,
2121                          VASliceParameterBufferJPEGBaseline *slice_param,
2122                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2123                          dri_bo *slice_data_bo,
2124                          struct gen7_mfd_context *gen7_mfd_context)
2125 {
2126     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2127     int scan_component_mask = 0;
2128     int i;
2129
2130     assert(slice_param->num_components > 0);
2131     assert(slice_param->num_components < 4);
2132     assert(slice_param->num_components <= pic_param->num_components);
2133
2134     for (i = 0; i < slice_param->num_components; i++) {
2135         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2136         case 1:
2137             scan_component_mask |= (1 << 0);
2138             break;
2139         case 2:
2140             scan_component_mask |= (1 << 1);
2141             break;
2142         case 3:
2143             scan_component_mask |= (1 << 2);
2144             break;
2145         default:
2146             assert(0);
2147             break;
2148         }
2149     }
2150
2151     BEGIN_BCS_BATCH(batch, 6);
2152     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2153     OUT_BCS_BATCH(batch, 
2154                   slice_param->slice_data_size);
2155     OUT_BCS_BATCH(batch, 
2156                   slice_param->slice_data_offset);
2157     OUT_BCS_BATCH(batch,
2158                   slice_param->slice_horizontal_position << 16 |
2159                   slice_param->slice_vertical_position << 0);
2160     OUT_BCS_BATCH(batch,
2161                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2162                   (scan_component_mask << 27) |                 /* scan components */
2163                   (0 << 26) |   /* disable interrupt allowed */
2164                   (slice_param->num_mcus << 0));                /* MCU count */
2165     OUT_BCS_BATCH(batch,
2166                   (slice_param->restart_interval << 0));    /* RestartInterval */
2167     ADVANCE_BCS_BATCH(batch);
2168 }
2169
2170 /* Workaround for JPEG decoding on Ivybridge */
2171 #ifdef JPEG_WA
2172
2173 VAStatus 
2174 i965_CreateSurfaces(VADriverContextP ctx,
2175                     int width,
2176                     int height,
2177                     int format,
2178                     int num_surfaces,
2179                     VASurfaceID *surfaces);
2180
2181 static struct {
2182     int width;
2183     int height;
2184     unsigned char data[32];
2185     int data_size;
2186     int data_bit_offset;
2187     int qp;
2188 } gen7_jpeg_wa_clip = {
2189     16,
2190     16,
2191     {
2192         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2193         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2194     },
2195     14,
2196     40,
2197     28,
2198 };
2199
2200 static void
2201 gen8_jpeg_wa_init(VADriverContextP ctx,
2202                   struct gen7_mfd_context *gen7_mfd_context)
2203 {
2204     struct i965_driver_data *i965 = i965_driver_data(ctx);
2205     VAStatus status;
2206     struct object_surface *obj_surface;
2207
2208     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2209         i965_DestroySurfaces(ctx,
2210                              &gen7_mfd_context->jpeg_wa_surface_id,
2211                              1);
2212
2213     status = i965_CreateSurfaces(ctx,
2214                                  gen7_jpeg_wa_clip.width,
2215                                  gen7_jpeg_wa_clip.height,
2216                                  VA_RT_FORMAT_YUV420,
2217                                  1,
2218                                  &gen7_mfd_context->jpeg_wa_surface_id);
2219     assert(status == VA_STATUS_SUCCESS);
2220
2221     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2222     assert(obj_surface);
2223     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2224     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2225
2226     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2227         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2228                                                                "JPEG WA data",
2229                                                                0x1000,
2230                                                                0x1000);
2231         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2232                        0,
2233                        gen7_jpeg_wa_clip.data_size,
2234                        gen7_jpeg_wa_clip.data);
2235     }
2236 }
2237
2238 static void
2239 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2240                               struct gen7_mfd_context *gen7_mfd_context)
2241 {
2242     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2243
2244     BEGIN_BCS_BATCH(batch, 5);
2245     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2246     OUT_BCS_BATCH(batch,
2247                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2248                   (MFD_MODE_VLD << 15) | /* VLD mode */
2249                   (0 << 10) | /* disable Stream-Out */
2250                   (0 << 9)  | /* Post Deblocking Output */
2251                   (1 << 8)  | /* Pre Deblocking Output */
2252                   (0 << 5)  | /* not in stitch mode */
2253                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2254                   (MFX_FORMAT_AVC << 0));
2255     OUT_BCS_BATCH(batch,
2256                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2257                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2258                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2259                   (0 << 1)  |
2260                   (0 << 0));
2261     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2262     OUT_BCS_BATCH(batch, 0); /* reserved */
2263     ADVANCE_BCS_BATCH(batch);
2264 }
2265
2266 static void
2267 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2268                            struct gen7_mfd_context *gen7_mfd_context)
2269 {
2270     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2271     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2272
2273     BEGIN_BCS_BATCH(batch, 6);
2274     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2275     OUT_BCS_BATCH(batch, 0);
2276     OUT_BCS_BATCH(batch,
2277                   ((obj_surface->orig_width - 1) << 18) |
2278                   ((obj_surface->orig_height - 1) << 4));
2279     OUT_BCS_BATCH(batch,
2280                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2281                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2282                   (0 << 22) | /* surface object control state, ignored */
2283                   ((obj_surface->width - 1) << 3) | /* pitch */
2284                   (0 << 2)  | /* must be 0 */
2285                   (1 << 1)  | /* must be tiled */
2286                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2287     OUT_BCS_BATCH(batch,
2288                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2289                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2290     OUT_BCS_BATCH(batch,
2291                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2292                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2293     ADVANCE_BCS_BATCH(batch);
2294 }
2295
2296 static void
2297 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2298                                  struct gen7_mfd_context *gen7_mfd_context)
2299 {
2300     struct i965_driver_data *i965 = i965_driver_data(ctx);
2301     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2302     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2303     dri_bo *intra_bo;
2304     int i;
2305
2306     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2307                             "intra row store",
2308                             128 * 64,
2309                             0x1000);
2310
2311     BEGIN_BCS_BATCH(batch, 61);
2312     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2313     OUT_BCS_RELOC(batch,
2314                   obj_surface->bo,
2315                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2316                   0);
2317         OUT_BCS_BATCH(batch, 0);
2318         OUT_BCS_BATCH(batch, 0);
2319     
2320
2321     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2322         OUT_BCS_BATCH(batch, 0);
2323         OUT_BCS_BATCH(batch, 0);
2324
2325         /* uncompressed-video & stream out 7-12 */
2326     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2327     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2328         OUT_BCS_BATCH(batch, 0);
2329         OUT_BCS_BATCH(batch, 0);
2330         OUT_BCS_BATCH(batch, 0);
2331         OUT_BCS_BATCH(batch, 0);
2332
2333         /* the DW 13-15 is for intra row store scratch */
2334     OUT_BCS_RELOC(batch,
2335                   intra_bo,
2336                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2337                   0);
2338         OUT_BCS_BATCH(batch, 0);
2339         OUT_BCS_BATCH(batch, 0);
2340
2341         /* the DW 16-18 is for deblocking filter */ 
2342     OUT_BCS_BATCH(batch, 0);
2343         OUT_BCS_BATCH(batch, 0);
2344         OUT_BCS_BATCH(batch, 0);
2345
2346     /* DW 19..50 */
2347     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2348         OUT_BCS_BATCH(batch, 0);
2349         OUT_BCS_BATCH(batch, 0);
2350     }
2351     OUT_BCS_BATCH(batch, 0);
2352
2353         /* the DW52-54 is for mb status address */
2354     OUT_BCS_BATCH(batch, 0);
2355         OUT_BCS_BATCH(batch, 0);
2356         OUT_BCS_BATCH(batch, 0);
2357         /* the DW56-60 is for ILDB & second ILDB address */
2358     OUT_BCS_BATCH(batch, 0);
2359         OUT_BCS_BATCH(batch, 0);
2360         OUT_BCS_BATCH(batch, 0);
2361     OUT_BCS_BATCH(batch, 0);
2362         OUT_BCS_BATCH(batch, 0);
2363         OUT_BCS_BATCH(batch, 0);
2364
2365     ADVANCE_BCS_BATCH(batch);
2366
2367     dri_bo_unreference(intra_bo);
2368 }
2369
2370 static void
2371 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2372                                      struct gen7_mfd_context *gen7_mfd_context)
2373 {
2374     struct i965_driver_data *i965 = i965_driver_data(ctx);
2375     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2376     dri_bo *bsd_mpc_bo, *mpr_bo;
2377
2378     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2379                               "bsd mpc row store",
2380                               11520, /* 1.5 * 120 * 64 */
2381                               0x1000);
2382
2383     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2384                           "mpr row store",
2385                           7680, /* 1. 0 * 120 * 64 */
2386                           0x1000);
2387
2388     BEGIN_BCS_BATCH(batch, 10);
2389     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2390
2391     OUT_BCS_RELOC(batch,
2392                   bsd_mpc_bo,
2393                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2394                   0);
2395
2396     OUT_BCS_BATCH(batch, 0);
2397     OUT_BCS_BATCH(batch, 0);
2398
2399     OUT_BCS_RELOC(batch,
2400                   mpr_bo,
2401                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2402                   0);
2403     OUT_BCS_BATCH(batch, 0);
2404     OUT_BCS_BATCH(batch, 0);
2405
2406     OUT_BCS_BATCH(batch, 0);
2407     OUT_BCS_BATCH(batch, 0);
2408     OUT_BCS_BATCH(batch, 0);
2409
2410     ADVANCE_BCS_BATCH(batch);
2411
2412     dri_bo_unreference(bsd_mpc_bo);
2413     dri_bo_unreference(mpr_bo);
2414 }
2415
2416 static void
2417 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2418                           struct gen7_mfd_context *gen7_mfd_context)
2419 {
2420
2421 }
2422
2423 static void
2424 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2425                            struct gen7_mfd_context *gen7_mfd_context)
2426 {
2427     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2428     int img_struct = 0;
2429     int mbaff_frame_flag = 0;
2430     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2431
2432     BEGIN_BCS_BATCH(batch, 16);
2433     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2434     OUT_BCS_BATCH(batch, 
2435                   width_in_mbs * height_in_mbs);
2436     OUT_BCS_BATCH(batch, 
2437                   ((height_in_mbs - 1) << 16) | 
2438                   ((width_in_mbs - 1) << 0));
2439     OUT_BCS_BATCH(batch, 
2440                   (0 << 24) |
2441                   (0 << 16) |
2442                   (0 << 14) |
2443                   (0 << 13) |
2444                   (0 << 12) | /* differ from GEN6 */
2445                   (0 << 10) |
2446                   (img_struct << 8));
2447     OUT_BCS_BATCH(batch,
2448                   (1 << 10) | /* 4:2:0 */
2449                   (1 << 7) |  /* CABAC */
2450                   (0 << 6) |
2451                   (0 << 5) |
2452                   (0 << 4) |
2453                   (0 << 3) |
2454                   (1 << 2) |
2455                   (mbaff_frame_flag << 1) |
2456                   (0 << 0));
2457     OUT_BCS_BATCH(batch, 0);
2458     OUT_BCS_BATCH(batch, 0);
2459     OUT_BCS_BATCH(batch, 0);
2460     OUT_BCS_BATCH(batch, 0);
2461     OUT_BCS_BATCH(batch, 0);
2462     OUT_BCS_BATCH(batch, 0);
2463     OUT_BCS_BATCH(batch, 0);
2464     OUT_BCS_BATCH(batch, 0);
2465     OUT_BCS_BATCH(batch, 0);
2466     OUT_BCS_BATCH(batch, 0);
2467     OUT_BCS_BATCH(batch, 0);
2468     ADVANCE_BCS_BATCH(batch);
2469 }
2470
2471 static void
2472 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2473                                   struct gen7_mfd_context *gen7_mfd_context)
2474 {
2475     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2476     int i;
2477
2478     BEGIN_BCS_BATCH(batch, 71);
2479     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2480
2481     /* reference surfaces 0..15 */
2482     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2483         OUT_BCS_BATCH(batch, 0); /* top */
2484         OUT_BCS_BATCH(batch, 0); /* bottom */
2485     }
2486         
2487         OUT_BCS_BATCH(batch, 0);
2488
2489     /* the current decoding frame/field */
2490     OUT_BCS_BATCH(batch, 0); /* top */
2491     OUT_BCS_BATCH(batch, 0);
2492     OUT_BCS_BATCH(batch, 0);
2493
2494     /* POC List */
2495     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2496         OUT_BCS_BATCH(batch, 0);
2497         OUT_BCS_BATCH(batch, 0);
2498     }
2499
2500     OUT_BCS_BATCH(batch, 0);
2501     OUT_BCS_BATCH(batch, 0);
2502
2503     ADVANCE_BCS_BATCH(batch);
2504 }
2505
2506 static void
2507 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2508                                      struct gen7_mfd_context *gen7_mfd_context)
2509 {
2510     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2511
2512     BEGIN_BCS_BATCH(batch, 11);
2513     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2514     OUT_BCS_RELOC(batch,
2515                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2516                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2517                   0);
2518     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2519     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2520     OUT_BCS_BATCH(batch, 0);
2521     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2522     OUT_BCS_BATCH(batch, 0);
2523     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2524     OUT_BCS_BATCH(batch, 0);
2525     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2526     OUT_BCS_BATCH(batch, 0);
2527     ADVANCE_BCS_BATCH(batch);
2528 }
2529
2530 static void
2531 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2532                             struct gen7_mfd_context *gen7_mfd_context)
2533 {
2534     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2535
2536     /* the input bitsteam format on GEN7 differs from GEN6 */
2537     BEGIN_BCS_BATCH(batch, 6);
2538     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2539     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2540     OUT_BCS_BATCH(batch, 0);
2541     OUT_BCS_BATCH(batch,
2542                   (0 << 31) |
2543                   (0 << 14) |
2544                   (0 << 12) |
2545                   (0 << 10) |
2546                   (0 << 8));
2547     OUT_BCS_BATCH(batch,
2548                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2549                   (0 << 5)  |
2550                   (0 << 4)  |
2551                   (1 << 3) | /* LastSlice Flag */
2552                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2553     OUT_BCS_BATCH(batch, 0);
2554     ADVANCE_BCS_BATCH(batch);
2555 }
2556
2557 static void
2558 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2559                              struct gen7_mfd_context *gen7_mfd_context)
2560 {
2561     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2562     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2563     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2564     int first_mb_in_slice = 0;
2565     int slice_type = SLICE_TYPE_I;
2566
2567     BEGIN_BCS_BATCH(batch, 11);
2568     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2569     OUT_BCS_BATCH(batch, slice_type);
2570     OUT_BCS_BATCH(batch, 
2571                   (num_ref_idx_l1 << 24) |
2572                   (num_ref_idx_l0 << 16) |
2573                   (0 << 8) |
2574                   (0 << 0));
2575     OUT_BCS_BATCH(batch, 
2576                   (0 << 29) |
2577                   (1 << 27) |   /* disable Deblocking */
2578                   (0 << 24) |
2579                   (gen7_jpeg_wa_clip.qp << 16) |
2580                   (0 << 8) |
2581                   (0 << 0));
2582     OUT_BCS_BATCH(batch, 
2583                   (slice_ver_pos << 24) |
2584                   (slice_hor_pos << 16) | 
2585                   (first_mb_in_slice << 0));
2586     OUT_BCS_BATCH(batch,
2587                   (next_slice_ver_pos << 16) |
2588                   (next_slice_hor_pos << 0));
2589     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2590     OUT_BCS_BATCH(batch, 0);
2591     OUT_BCS_BATCH(batch, 0);
2592     OUT_BCS_BATCH(batch, 0);
2593     OUT_BCS_BATCH(batch, 0);
2594     ADVANCE_BCS_BATCH(batch);
2595 }
2596
2597 static void
2598 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2599                  struct gen7_mfd_context *gen7_mfd_context)
2600 {
2601     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2602     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2603     intel_batchbuffer_emit_mi_flush(batch);
2604     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2605     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2606     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2607     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2608     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2609     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2610     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2611
2612     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2613     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2614     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2615 }
2616
2617 #endif
2618
2619 void
2620 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2621                              struct decode_state *decode_state,
2622                              struct gen7_mfd_context *gen7_mfd_context)
2623 {
2624     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2625     VAPictureParameterBufferJPEGBaseline *pic_param;
2626     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2627     dri_bo *slice_data_bo;
2628     int i, j, max_selector = 0;
2629
2630     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2631     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2632
2633     /* Currently only support Baseline DCT */
2634     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2635     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2636 #ifdef JPEG_WA
2637     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2638 #endif
2639     intel_batchbuffer_emit_mi_flush(batch);
2640     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2641     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2642     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2643     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2644     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2645
2646     for (j = 0; j < decode_state->num_slice_params; j++) {
2647         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2648         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2649         slice_data_bo = decode_state->slice_datas[j]->bo;
2650         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2651
2652         if (j == decode_state->num_slice_params - 1)
2653             next_slice_group_param = NULL;
2654         else
2655             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2656
2657         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2658             int component;
2659
2660             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2661
2662             if (i < decode_state->slice_params[j]->num_elements - 1)
2663                 next_slice_param = slice_param + 1;
2664             else
2665                 next_slice_param = next_slice_group_param;
2666
2667             for (component = 0; component < slice_param->num_components; component++) {
2668                 if (max_selector < slice_param->components[component].dc_table_selector)
2669                     max_selector = slice_param->components[component].dc_table_selector;
2670
2671                 if (max_selector < slice_param->components[component].ac_table_selector)
2672                     max_selector = slice_param->components[component].ac_table_selector;
2673             }
2674
2675             slice_param++;
2676         }
2677     }
2678
2679     assert(max_selector < 2);
2680     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2681
2682     for (j = 0; j < decode_state->num_slice_params; j++) {
2683         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2684         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2685         slice_data_bo = decode_state->slice_datas[j]->bo;
2686         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2687
2688         if (j == decode_state->num_slice_params - 1)
2689             next_slice_group_param = NULL;
2690         else
2691             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2692
2693         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2694             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2695
2696             if (i < decode_state->slice_params[j]->num_elements - 1)
2697                 next_slice_param = slice_param + 1;
2698             else
2699                 next_slice_param = next_slice_group_param;
2700
2701             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2702             slice_param++;
2703         }
2704     }
2705
2706     intel_batchbuffer_end_atomic(batch);
2707     intel_batchbuffer_flush(batch);
2708 }
2709
2710 static const int vp8_dc_qlookup[128] =
2711 {
2712       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2713      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2714      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2715      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2716      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2717      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2718      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2719     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2720 };
2721
2722 static const int vp8_ac_qlookup[128] =
2723 {
2724       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2725      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2726      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2727      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2728      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2729     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2730     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2731     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2732 };
2733
2734 static inline unsigned int vp8_clip_quantization_index(int index)
2735 {
2736     if(index > 127)
2737         return 127;
2738     else if(index <0)
2739         return 0;
2740
2741     return index;
2742 }
2743
2744 static void
2745 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2746                           struct decode_state *decode_state,
2747                           struct gen7_mfd_context *gen7_mfd_context)
2748 {
2749     struct object_surface *obj_surface;
2750     struct i965_driver_data *i965 = i965_driver_data(ctx);
2751     dri_bo *bo;
2752     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2753     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2754     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2755
2756     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2757     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2758
2759     intel_update_vp8_frame_store_index(ctx,
2760                                        decode_state,
2761                                        pic_param,
2762                                        gen7_mfd_context->reference_surface);
2763
2764     /* Current decoded picture */
2765     obj_surface = decode_state->render_object;
2766     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2767
2768     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2769     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2770     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2771     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2772
2773     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2774     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2775     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2776     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2777
2778     intel_ensure_vp8_segmentation_buffer(ctx,
2779         &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2780
2781     /* The same as AVC */
2782     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2783     bo = dri_bo_alloc(i965->intel.bufmgr,
2784                       "intra row store",
2785                       width_in_mbs * 64,
2786                       0x1000);
2787     assert(bo);
2788     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2789     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2790
2791     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2792     bo = dri_bo_alloc(i965->intel.bufmgr,
2793                       "deblocking filter row store",
2794                       width_in_mbs * 64 * 4,
2795                       0x1000);
2796     assert(bo);
2797     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2798     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2799
2800     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2801     bo = dri_bo_alloc(i965->intel.bufmgr,
2802                       "bsd mpc row store",
2803                       width_in_mbs * 64 * 2,
2804                       0x1000);
2805     assert(bo);
2806     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2807     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2808
2809     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2810     bo = dri_bo_alloc(i965->intel.bufmgr,
2811                       "mpr row store",
2812                       width_in_mbs * 64 * 2,
2813                       0x1000);
2814     assert(bo);
2815     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2816     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2817
2818     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2819 }
2820
2821 static void
2822 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2823                        struct decode_state *decode_state,
2824                        struct gen7_mfd_context *gen7_mfd_context)
2825 {
2826     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2827     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2828     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2829     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2830     dri_bo *probs_bo = decode_state->probability_data->bo;
2831     int i, j,log2num;
2832     unsigned int quantization_value[4][6];
2833
2834     /* There is no safe way to error out if the segmentation buffer
2835        could not be allocated. So, instead of aborting, simply decode
2836        something even if the result may look totally inacurate */
2837     const unsigned int enable_segmentation =
2838         pic_param->pic_fields.bits.segmentation_enabled &&
2839         gen7_mfd_context->segmentation_buffer.valid;
2840         
2841     log2num = (int)log2(slice_param->num_of_partitions - 1);
2842
2843     BEGIN_BCS_BATCH(batch, 38);
2844     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2845     OUT_BCS_BATCH(batch,
2846                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2847                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2848     OUT_BCS_BATCH(batch,
2849                   log2num << 24 |
2850                   pic_param->pic_fields.bits.sharpness_level << 16 |
2851                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2852                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2853                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2854                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2855                   pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2856                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2857                   (enable_segmentation &&
2858                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2859                   (enable_segmentation &&
2860                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2861                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2862                   pic_param->pic_fields.bits.filter_type << 4 |
2863                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2864                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2865
2866     OUT_BCS_BATCH(batch,
2867                   pic_param->loop_filter_level[3] << 24 |
2868                   pic_param->loop_filter_level[2] << 16 |
2869                   pic_param->loop_filter_level[1] <<  8 |
2870                   pic_param->loop_filter_level[0] <<  0);
2871
2872     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2873     for (i = 0; i < 4; i++) {
2874                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2875                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2876                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2877                 /* 101581>>16 is equivalent to 155/100 */
2878                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2879                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2880                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2881
2882                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2883                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2884
2885                 OUT_BCS_BATCH(batch,
2886                       quantization_value[i][0] << 16 | /* Y1AC */
2887                       quantization_value[i][1] <<  0); /* Y1DC */
2888         OUT_BCS_BATCH(batch,
2889                       quantization_value[i][5] << 16 | /* UVAC */
2890                       quantization_value[i][4] <<  0); /* UVDC */
2891         OUT_BCS_BATCH(batch,
2892                       quantization_value[i][3] << 16 | /* Y2AC */
2893                       quantization_value[i][2] <<  0); /* Y2DC */
2894     }
2895
2896     /* CoeffProbability table for non-key frame, DW16-DW18 */
2897     if (probs_bo) {
2898         OUT_BCS_RELOC(batch, probs_bo,
2899                       0, I915_GEM_DOMAIN_INSTRUCTION,
2900                       0);
2901         OUT_BCS_BATCH(batch, 0);
2902         OUT_BCS_BATCH(batch, 0);
2903     } else {
2904         OUT_BCS_BATCH(batch, 0);
2905         OUT_BCS_BATCH(batch, 0);
2906         OUT_BCS_BATCH(batch, 0);
2907     }
2908
2909     OUT_BCS_BATCH(batch,
2910                   pic_param->mb_segment_tree_probs[2] << 16 |
2911                   pic_param->mb_segment_tree_probs[1] <<  8 |
2912                   pic_param->mb_segment_tree_probs[0] <<  0);
2913
2914     OUT_BCS_BATCH(batch,
2915                   pic_param->prob_skip_false << 24 |
2916                   pic_param->prob_intra      << 16 |
2917                   pic_param->prob_last       <<  8 |
2918                   pic_param->prob_gf         <<  0);
2919
2920     OUT_BCS_BATCH(batch,
2921                   pic_param->y_mode_probs[3] << 24 |
2922                   pic_param->y_mode_probs[2] << 16 |
2923                   pic_param->y_mode_probs[1] <<  8 |
2924                   pic_param->y_mode_probs[0] <<  0);
2925
2926     OUT_BCS_BATCH(batch,
2927                   pic_param->uv_mode_probs[2] << 16 |
2928                   pic_param->uv_mode_probs[1] <<  8 |
2929                   pic_param->uv_mode_probs[0] <<  0);
2930     
2931     /* MV update value, DW23-DW32 */
2932     for (i = 0; i < 2; i++) {
2933         for (j = 0; j < 20; j += 4) {
2934             OUT_BCS_BATCH(batch,
2935                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2936                           pic_param->mv_probs[i][j + 2] << 16 |
2937                           pic_param->mv_probs[i][j + 1] <<  8 |
2938                           pic_param->mv_probs[i][j + 0] <<  0);
2939         }
2940     }
2941
2942     OUT_BCS_BATCH(batch,
2943                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2944                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2945                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2946                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2947
2948     OUT_BCS_BATCH(batch,
2949                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2950                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2951                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2952                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2953
2954     /* segmentation id stream base address, DW35-DW37 */
2955     if (enable_segmentation) {
2956         OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2957                       0, I915_GEM_DOMAIN_INSTRUCTION,
2958                       0);
2959         OUT_BCS_BATCH(batch, 0);
2960         OUT_BCS_BATCH(batch, 0);
2961     }
2962     else {
2963         OUT_BCS_BATCH(batch, 0);
2964         OUT_BCS_BATCH(batch, 0);
2965         OUT_BCS_BATCH(batch, 0);
2966     }
2967     ADVANCE_BCS_BATCH(batch);
2968 }
2969
2970 static void
2971 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2972                         VAPictureParameterBufferVP8 *pic_param,
2973                         VASliceParameterBufferVP8 *slice_param,
2974                         dri_bo *slice_data_bo,
2975                         struct gen7_mfd_context *gen7_mfd_context)
2976 {
2977     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2978     int i, log2num;
2979     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2980     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2981     unsigned int partition_size_0 = slice_param->partition_size[0];
2982
2983     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2984     if (used_bits == 8) {
2985         used_bits = 0;
2986         offset += 1;
2987         partition_size_0 -= 1;
2988     }
2989
2990     assert(slice_param->num_of_partitions >= 2);
2991     assert(slice_param->num_of_partitions <= 9);
2992
2993     log2num = (int)log2(slice_param->num_of_partitions - 1);
2994
2995     BEGIN_BCS_BATCH(batch, 22);
2996     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2997     OUT_BCS_BATCH(batch,
2998                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2999                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
3000                   log2num << 4 |
3001                   (slice_param->macroblock_offset & 0x7));
3002     OUT_BCS_BATCH(batch,
3003                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3004                   0);
3005
3006     OUT_BCS_BATCH(batch, partition_size_0);
3007     OUT_BCS_BATCH(batch, offset);
3008     //partion sizes in bytes are present after the above first partition when there are more than one token partition
3009     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3010     for (i = 1; i < 9; i++) {
3011         if (i < slice_param->num_of_partitions) {
3012             OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
3013             OUT_BCS_BATCH(batch, offset);
3014         } else {
3015             OUT_BCS_BATCH(batch, 0);
3016             OUT_BCS_BATCH(batch, 0);
3017         }
3018
3019         offset += slice_param->partition_size[i];
3020     }
3021
3022     OUT_BCS_BATCH(batch,
3023                   1 << 31 | /* concealment method */
3024                   0);
3025
3026     ADVANCE_BCS_BATCH(batch);
3027 }
3028
3029 void
3030 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3031                             struct decode_state *decode_state,
3032                             struct gen7_mfd_context *gen7_mfd_context)
3033 {
3034     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3035     VAPictureParameterBufferVP8 *pic_param;
3036     VASliceParameterBufferVP8 *slice_param;
3037     dri_bo *slice_data_bo;
3038
3039     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3040     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3041
3042     /* one slice per frame */
3043     if (decode_state->num_slice_params != 1 ||
3044         (!decode_state->slice_params ||
3045          !decode_state->slice_params[0] ||
3046          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3047         (!decode_state->slice_datas ||
3048          !decode_state->slice_datas[0] ||
3049          !decode_state->slice_datas[0]->bo) ||
3050         !decode_state->probability_data) {
3051         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3052
3053         return;
3054     }
3055
3056     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3057     slice_data_bo = decode_state->slice_datas[0]->bo;
3058
3059     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3060     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3061     intel_batchbuffer_emit_mi_flush(batch);
3062     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3063     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3064     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3065     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3066     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3067     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3068     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3069     intel_batchbuffer_end_atomic(batch);
3070     intel_batchbuffer_flush(batch);
3071 }
3072
3073 static VAStatus
3074 gen8_mfd_decode_picture(VADriverContextP ctx, 
3075                         VAProfile profile, 
3076                         union codec_state *codec_state,
3077                         struct hw_context *hw_context)
3078
3079 {
3080     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3081     struct decode_state *decode_state = &codec_state->decode;
3082     VAStatus vaStatus;
3083
3084     assert(gen7_mfd_context);
3085
3086     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3087
3088     if (vaStatus != VA_STATUS_SUCCESS)
3089         goto out;
3090
3091     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3092
3093     switch (profile) {
3094     case VAProfileMPEG2Simple:
3095     case VAProfileMPEG2Main:
3096         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3097         break;
3098         
3099     case VAProfileH264ConstrainedBaseline:
3100     case VAProfileH264Main:
3101     case VAProfileH264High:
3102         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3103         break;
3104
3105     case VAProfileVC1Simple:
3106     case VAProfileVC1Main:
3107     case VAProfileVC1Advanced:
3108         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3109         break;
3110
3111     case VAProfileJPEGBaseline:
3112         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3113         break;
3114
3115     case VAProfileVP8Version0_3:
3116         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3117         break;
3118
3119     default:
3120         assert(0);
3121         break;
3122     }
3123
3124     vaStatus = VA_STATUS_SUCCESS;
3125
3126 out:
3127     return vaStatus;
3128 }
3129
3130 static void
3131 gen8_mfd_context_destroy(void *hw_context)
3132 {
3133     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3134
3135     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3136     gen7_mfd_context->post_deblocking_output.bo = NULL;
3137
3138     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3139     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3140
3141     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3142     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3143
3144     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3145     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3146
3147     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3148     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3149
3150     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3151     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3152
3153     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3154     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3155
3156     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3157     gen7_mfd_context->segmentation_buffer.bo = NULL;
3158
3159     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3160
3161     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3162     free(gen7_mfd_context);
3163 }
3164
3165 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3166                                     struct gen7_mfd_context *gen7_mfd_context)
3167 {
3168     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3169     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3170     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3171     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3172 }
3173
3174 struct hw_context *
3175 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3176 {
3177     struct intel_driver_data *intel = intel_driver_data(ctx);
3178     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3179     int i;
3180
3181     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3182     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3183     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3184
3185     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3186         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3187         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3188     }
3189
3190     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3191     gen7_mfd_context->segmentation_buffer.valid = 0;
3192
3193     switch (obj_config->profile) {
3194     case VAProfileMPEG2Simple:
3195     case VAProfileMPEG2Main:
3196         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3197         break;
3198
3199     case VAProfileH264ConstrainedBaseline:
3200     case VAProfileH264Main:
3201     case VAProfileH264High:
3202         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3203         break;
3204     default:
3205         break;
3206     }
3207     return (struct hw_context *)gen7_mfd_context;
3208 }