OSDN Git Service

decode/VP8: HW needs 1 extra byte for each partition
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77         gen7_avc_surface->base.frame_store_id = -1;
78         assert((obj_surface->size & 0x3f) == 0);
79         obj_surface->private_data = gen7_avc_surface;
80     }
81
82     /* DMV buffers now relate to the whole frame, irrespective of
83        field coding modes */
84     if (gen7_avc_surface->dmv_top == NULL) {
85         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86                                                  "direct mv w/r buffer",
87                                                  width_in_mbs * height_in_mbs * 128,
88                                                  0x1000);
89         assert(gen7_avc_surface->dmv_top);
90     }
91 }
92
93 static void
94 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
95                           struct decode_state *decode_state,
96                           int standard_select,
97                           struct gen7_mfd_context *gen7_mfd_context)
98 {
99     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
100
101     assert(standard_select == MFX_FORMAT_MPEG2 ||
102            standard_select == MFX_FORMAT_AVC ||
103            standard_select == MFX_FORMAT_VC1 ||
104            standard_select == MFX_FORMAT_JPEG ||
105            standard_select == MFX_FORMAT_VP8);
106
107     BEGIN_BCS_BATCH(batch, 5);
108     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
109     OUT_BCS_BATCH(batch,
110                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
111                   (MFD_MODE_VLD << 15) | /* VLD mode */
112                   (0 << 10) | /* disable Stream-Out */
113                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
114                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
115                   (0 << 5)  | /* not in stitch mode */
116                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
117                   (standard_select << 0));
118     OUT_BCS_BATCH(batch,
119                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
120                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
121                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
122                   (0 << 1)  |
123                   (0 << 0));
124     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
125     OUT_BCS_BATCH(batch, 0); /* reserved */
126     ADVANCE_BCS_BATCH(batch);
127 }
128
129 static void
130 gen8_mfd_surface_state(VADriverContextP ctx,
131                        struct decode_state *decode_state,
132                        int standard_select,
133                        struct gen7_mfd_context *gen7_mfd_context)
134 {
135     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
136     struct object_surface *obj_surface = decode_state->render_object;
137     unsigned int y_cb_offset;
138     unsigned int y_cr_offset;
139     unsigned int surface_format;
140
141     assert(obj_surface);
142
143     y_cb_offset = obj_surface->y_cb_offset;
144     y_cr_offset = obj_surface->y_cr_offset;
145
146     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
147         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
148
149     BEGIN_BCS_BATCH(batch, 6);
150     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
151     OUT_BCS_BATCH(batch, 0);
152     OUT_BCS_BATCH(batch,
153                   ((obj_surface->orig_height - 1) << 18) |
154                   ((obj_surface->orig_width - 1) << 4));
155     OUT_BCS_BATCH(batch,
156                   (surface_format << 28) | /* 420 planar YUV surface */
157                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
158                   (0 << 22) | /* surface object control state, ignored */
159                   ((obj_surface->width - 1) << 3) | /* pitch */
160                   (0 << 2)  | /* must be 0 */
161                   (1 << 1)  | /* must be tiled */
162                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
163     OUT_BCS_BATCH(batch,
164                   (0 << 16) | /* X offset for U(Cb), must be 0 */
165                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
166     OUT_BCS_BATCH(batch,
167                   (0 << 16) | /* X offset for V(Cr), must be 0 */
168                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
169     ADVANCE_BCS_BATCH(batch);
170 }
171
172 static void
173 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
174                              struct decode_state *decode_state,
175                              int standard_select,
176                              struct gen7_mfd_context *gen7_mfd_context)
177 {
178     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
179     int i;
180
181     BEGIN_BCS_BATCH(batch, 61);
182     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
183         /* Pre-deblock 1-3 */
184     if (gen7_mfd_context->pre_deblocking_output.valid)
185         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
186                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
187                       0);
188     else
189         OUT_BCS_BATCH(batch, 0);
190
191         OUT_BCS_BATCH(batch, 0);
192         OUT_BCS_BATCH(batch, 0);
193         /* Post-debloing 4-6 */
194     if (gen7_mfd_context->post_deblocking_output.valid)
195         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
196                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197                       0);
198     else
199         OUT_BCS_BATCH(batch, 0);
200
201         OUT_BCS_BATCH(batch, 0);
202         OUT_BCS_BATCH(batch, 0);
203
204         /* uncompressed-video & stream out 7-12 */
205     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
207         OUT_BCS_BATCH(batch, 0);
208         OUT_BCS_BATCH(batch, 0);
209         OUT_BCS_BATCH(batch, 0);
210         OUT_BCS_BATCH(batch, 0);
211
212         /* intra row-store scratch 13-15 */
213     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
214         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
215                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
216                       0);
217     else
218         OUT_BCS_BATCH(batch, 0);
219
220         OUT_BCS_BATCH(batch, 0);
221         OUT_BCS_BATCH(batch, 0);
222         /* deblocking-filter-row-store 16-18 */
223     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
224         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
225                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
226                       0);
227     else
228         OUT_BCS_BATCH(batch, 0);
229         OUT_BCS_BATCH(batch, 0);
230         OUT_BCS_BATCH(batch, 0);
231
232     /* DW 19..50 */
233     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
234         struct object_surface *obj_surface;
235
236         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
237             gen7_mfd_context->reference_surface[i].obj_surface &&
238             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
239             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
240
241             OUT_BCS_RELOC(batch, obj_surface->bo,
242                           I915_GEM_DOMAIN_INSTRUCTION, 0,
243                           0);
244         } else {
245             OUT_BCS_BATCH(batch, 0);
246         }
247         
248         OUT_BCS_BATCH(batch, 0);
249     }
250     
251     /* reference property 51 */
252     OUT_BCS_BATCH(batch, 0);  
253         
254     /* Macroblock status & ILDB 52-57 */
255     OUT_BCS_BATCH(batch, 0);
256     OUT_BCS_BATCH(batch, 0);
257     OUT_BCS_BATCH(batch, 0);
258     OUT_BCS_BATCH(batch, 0);
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261
262     /* the second Macroblock status 58-60 */    
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265     OUT_BCS_BATCH(batch, 0);
266
267     ADVANCE_BCS_BATCH(batch);
268 }
269
270 static void
271 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
272                                  dri_bo *slice_data_bo,
273                                  int standard_select,
274                                  struct gen7_mfd_context *gen7_mfd_context)
275 {
276     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
277
278     BEGIN_BCS_BATCH(batch, 26);
279     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
280         /* MFX In BS 1-5 */
281     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
282     OUT_BCS_BATCH(batch, 0);
283     OUT_BCS_BATCH(batch, 0);
284         /* Upper bound 4-5 */   
285     OUT_BCS_BATCH(batch, 0);
286     OUT_BCS_BATCH(batch, 0);
287
288         /* MFX indirect MV 6-10 */
289     OUT_BCS_BATCH(batch, 0);
290     OUT_BCS_BATCH(batch, 0);
291     OUT_BCS_BATCH(batch, 0);
292     OUT_BCS_BATCH(batch, 0);
293     OUT_BCS_BATCH(batch, 0);
294         
295         /* MFX IT_COFF 11-15 */
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299     OUT_BCS_BATCH(batch, 0);
300     OUT_BCS_BATCH(batch, 0);
301
302         /* MFX IT_DBLK 16-20 */
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0);
307     OUT_BCS_BATCH(batch, 0);
308
309         /* MFX PAK_BSE object for encoder 21-25 */
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313     OUT_BCS_BATCH(batch, 0);
314     OUT_BCS_BATCH(batch, 0);
315
316     ADVANCE_BCS_BATCH(batch);
317 }
318
319 static void
320 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
321                                  struct decode_state *decode_state,
322                                  int standard_select,
323                                  struct gen7_mfd_context *gen7_mfd_context)
324 {
325     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
326
327     BEGIN_BCS_BATCH(batch, 10);
328     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
329
330     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
331         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
332                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
333                       0);
334         else
335                 OUT_BCS_BATCH(batch, 0);
336                 
337     OUT_BCS_BATCH(batch, 0);
338     OUT_BCS_BATCH(batch, 0);
339         /* MPR Row Store Scratch buffer 4-6 */
340     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
341         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
342                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
343                       0);
344     else
345         OUT_BCS_BATCH(batch, 0);
346
347     OUT_BCS_BATCH(batch, 0);
348     OUT_BCS_BATCH(batch, 0);
349
350         /* Bitplane 7-9 */ 
351     if (gen7_mfd_context->bitplane_read_buffer.valid)
352         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
353                       I915_GEM_DOMAIN_INSTRUCTION, 0,
354                       0);
355     else
356         OUT_BCS_BATCH(batch, 0);
357     OUT_BCS_BATCH(batch, 0);
358     OUT_BCS_BATCH(batch, 0);
359     ADVANCE_BCS_BATCH(batch);
360 }
361
362 static void
363 gen8_mfd_qm_state(VADriverContextP ctx,
364                   int qm_type,
365                   unsigned char *qm,
366                   int qm_length,
367                   struct gen7_mfd_context *gen7_mfd_context)
368 {
369     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
370     unsigned int qm_buffer[16];
371
372     assert(qm_length <= 16 * 4);
373     memcpy(qm_buffer, qm, qm_length);
374
375     BEGIN_BCS_BATCH(batch, 18);
376     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
377     OUT_BCS_BATCH(batch, qm_type << 0);
378     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
379     ADVANCE_BCS_BATCH(batch);
380 }
381
382 static void
383 gen8_mfd_avc_img_state(VADriverContextP ctx,
384                        struct decode_state *decode_state,
385                        struct gen7_mfd_context *gen7_mfd_context)
386 {
387     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
388     int img_struct;
389     int mbaff_frame_flag;
390     unsigned int width_in_mbs, height_in_mbs;
391     VAPictureParameterBufferH264 *pic_param;
392
393     assert(decode_state->pic_param && decode_state->pic_param->buffer);
394     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
395     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
396
397     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
398         img_struct = 1;
399     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
400         img_struct = 3;
401     else
402         img_struct = 0;
403
404     if ((img_struct & 0x1) == 0x1) {
405         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
406     } else {
407         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
408     }
409
410     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
411         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
412         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
413     } else {
414         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
415     }
416
417     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
418                         !pic_param->pic_fields.bits.field_pic_flag);
419
420     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
421     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
422
423     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
424     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
425            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
426     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
427
428     BEGIN_BCS_BATCH(batch, 17);
429     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
430     OUT_BCS_BATCH(batch, 
431                   (width_in_mbs * height_in_mbs - 1));
432     OUT_BCS_BATCH(batch, 
433                   ((height_in_mbs - 1) << 16) | 
434                   ((width_in_mbs - 1) << 0));
435     OUT_BCS_BATCH(batch, 
436                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
437                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
438                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
439                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
440                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
441                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
442                   (img_struct << 8));
443     OUT_BCS_BATCH(batch,
444                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
445                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
446                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
447                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
448                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
449                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
450                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
451                   (mbaff_frame_flag << 1) |
452                   (pic_param->pic_fields.bits.field_pic_flag << 0));
453     OUT_BCS_BATCH(batch, 0);
454     OUT_BCS_BATCH(batch, 0);
455     OUT_BCS_BATCH(batch, 0);
456     OUT_BCS_BATCH(batch, 0);
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     OUT_BCS_BATCH(batch, 0);
465     ADVANCE_BCS_BATCH(batch);
466 }
467
468 static void
469 gen8_mfd_avc_qm_state(VADriverContextP ctx,
470                       struct decode_state *decode_state,
471                       struct gen7_mfd_context *gen7_mfd_context)
472 {
473     VAIQMatrixBufferH264 *iq_matrix;
474     VAPictureParameterBufferH264 *pic_param;
475
476     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
477         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
478     else
479         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
480
481     assert(decode_state->pic_param && decode_state->pic_param->buffer);
482     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
483
484     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
485     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
486
487     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
488         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
489         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
490     }
491 }
492
493 static inline void
494 gen8_mfd_avc_picid_state(VADriverContextP ctx,
495     struct decode_state *decode_state,
496     struct gen7_mfd_context *gen7_mfd_context)
497 {
498     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
499         gen7_mfd_context->reference_surface);
500 }
501
502 static void
503 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
504                               struct decode_state *decode_state,
505                               VAPictureParameterBufferH264 *pic_param,
506                               VASliceParameterBufferH264 *slice_param,
507                               struct gen7_mfd_context *gen7_mfd_context)
508 {
509     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
510     struct object_surface *obj_surface;
511     GenAvcSurface *gen7_avc_surface;
512     VAPictureH264 *va_pic;
513     int i;
514
515     BEGIN_BCS_BATCH(batch, 71);
516     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
517
518     /* reference surfaces 0..15 */
519     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
520         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
521             gen7_mfd_context->reference_surface[i].obj_surface &&
522             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
523
524             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
525             gen7_avc_surface = obj_surface->private_data;
526
527             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
528                           I915_GEM_DOMAIN_INSTRUCTION, 0,
529                           0);
530             OUT_BCS_BATCH(batch, 0);
531         } else {
532             OUT_BCS_BATCH(batch, 0);
533             OUT_BCS_BATCH(batch, 0);
534         }
535     }
536     
537     OUT_BCS_BATCH(batch, 0);
538
539     /* the current decoding frame/field */
540     va_pic = &pic_param->CurrPic;
541     obj_surface = decode_state->render_object;
542     assert(obj_surface->bo && obj_surface->private_data);
543     gen7_avc_surface = obj_surface->private_data;
544
545     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
546                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
547                   0);
548
549     OUT_BCS_BATCH(batch, 0);
550     OUT_BCS_BATCH(batch, 0);
551
552     /* POC List */
553     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
554         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
555
556         if (obj_surface) {
557             const VAPictureH264 * const va_pic = avc_find_picture(
558                 obj_surface->base.id, pic_param->ReferenceFrames,
559                 ARRAY_ELEMS(pic_param->ReferenceFrames));
560
561             assert(va_pic != NULL);
562             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
563             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
564         } else {
565             OUT_BCS_BATCH(batch, 0);
566             OUT_BCS_BATCH(batch, 0);
567         }
568     }
569
570     va_pic = &pic_param->CurrPic;
571     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
572     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
573
574     ADVANCE_BCS_BATCH(batch);
575 }
576
577 static void
578 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
579                                  VAPictureParameterBufferH264 *pic_param,
580                                  VASliceParameterBufferH264 *next_slice_param,
581                                  struct gen7_mfd_context *gen7_mfd_context)
582 {
583     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
584 }
585
586 static void
587 gen8_mfd_avc_slice_state(VADriverContextP ctx,
588                          VAPictureParameterBufferH264 *pic_param,
589                          VASliceParameterBufferH264 *slice_param,
590                          VASliceParameterBufferH264 *next_slice_param,
591                          struct gen7_mfd_context *gen7_mfd_context)
592 {
593     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
594     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
595     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
596     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
597     int num_ref_idx_l0, num_ref_idx_l1;
598     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
599                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
600     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
601     int slice_type;
602
603     if (slice_param->slice_type == SLICE_TYPE_I ||
604         slice_param->slice_type == SLICE_TYPE_SI) {
605         slice_type = SLICE_TYPE_I;
606     } else if (slice_param->slice_type == SLICE_TYPE_P ||
607                slice_param->slice_type == SLICE_TYPE_SP) {
608         slice_type = SLICE_TYPE_P;
609     } else { 
610         assert(slice_param->slice_type == SLICE_TYPE_B);
611         slice_type = SLICE_TYPE_B;
612     }
613
614     if (slice_type == SLICE_TYPE_I) {
615         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
616         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
617         num_ref_idx_l0 = 0;
618         num_ref_idx_l1 = 0;
619     } else if (slice_type == SLICE_TYPE_P) {
620         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
621         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
622         num_ref_idx_l1 = 0;
623     } else {
624         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
625         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
626     }
627
628     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
629     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
630     slice_ver_pos = first_mb_in_slice / width_in_mbs;
631
632     if (next_slice_param) {
633         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
634         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
635         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
636     } else {
637         next_slice_hor_pos = 0;
638         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
639     }
640
641     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
642     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
643     OUT_BCS_BATCH(batch, slice_type);
644     OUT_BCS_BATCH(batch, 
645                   (num_ref_idx_l1 << 24) |
646                   (num_ref_idx_l0 << 16) |
647                   (slice_param->chroma_log2_weight_denom << 8) |
648                   (slice_param->luma_log2_weight_denom << 0));
649     OUT_BCS_BATCH(batch, 
650                   (slice_param->direct_spatial_mv_pred_flag << 29) |
651                   (slice_param->disable_deblocking_filter_idc << 27) |
652                   (slice_param->cabac_init_idc << 24) |
653                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
654                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
655                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
656     OUT_BCS_BATCH(batch, 
657                   (slice_ver_pos << 24) |
658                   (slice_hor_pos << 16) | 
659                   (first_mb_in_slice << 0));
660     OUT_BCS_BATCH(batch,
661                   (next_slice_ver_pos << 16) |
662                   (next_slice_hor_pos << 0));
663     OUT_BCS_BATCH(batch, 
664                   (next_slice_param == NULL) << 19); /* last slice flag */
665     OUT_BCS_BATCH(batch, 0);
666     OUT_BCS_BATCH(batch, 0);
667     OUT_BCS_BATCH(batch, 0);
668     OUT_BCS_BATCH(batch, 0);
669     ADVANCE_BCS_BATCH(batch);
670 }
671
672 static inline void
673 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
674                            VAPictureParameterBufferH264 *pic_param,
675                            VASliceParameterBufferH264 *slice_param,
676                            struct gen7_mfd_context *gen7_mfd_context)
677 {
678     gen6_send_avc_ref_idx_state(
679         gen7_mfd_context->base.batch,
680         slice_param,
681         gen7_mfd_context->reference_surface
682     );
683 }
684
685 static void
686 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
687                                 VAPictureParameterBufferH264 *pic_param,
688                                 VASliceParameterBufferH264 *slice_param,
689                                 struct gen7_mfd_context *gen7_mfd_context)
690 {
691     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
692     int i, j, num_weight_offset_table = 0;
693     short weightoffsets[32 * 6];
694
695     if ((slice_param->slice_type == SLICE_TYPE_P ||
696          slice_param->slice_type == SLICE_TYPE_SP) &&
697         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
698         num_weight_offset_table = 1;
699     }
700     
701     if ((slice_param->slice_type == SLICE_TYPE_B) &&
702         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
703         num_weight_offset_table = 2;
704     }
705
706     for (i = 0; i < num_weight_offset_table; i++) {
707         BEGIN_BCS_BATCH(batch, 98);
708         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
709         OUT_BCS_BATCH(batch, i);
710
711         if (i == 0) {
712             for (j = 0; j < 32; j++) {
713                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
714                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
715                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
716                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
717                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
718                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
719             }
720         } else {
721             for (j = 0; j < 32; j++) {
722                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
723                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
724                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
725                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
726                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
727                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
728             }
729         }
730
731         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
732         ADVANCE_BCS_BATCH(batch);
733     }
734 }
735
736 static void
737 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
738                         VAPictureParameterBufferH264 *pic_param,
739                         VASliceParameterBufferH264 *slice_param,
740                         dri_bo *slice_data_bo,
741                         VASliceParameterBufferH264 *next_slice_param,
742                         struct gen7_mfd_context *gen7_mfd_context)
743 {
744     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
745     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
746                                                             slice_param,
747                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
748
749     /* the input bitsteam format on GEN7 differs from GEN6 */
750     BEGIN_BCS_BATCH(batch, 6);
751     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
752     OUT_BCS_BATCH(batch, 
753                   (slice_param->slice_data_size));
754     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
755     OUT_BCS_BATCH(batch,
756                   (0 << 31) |
757                   (0 << 14) |
758                   (0 << 12) |
759                   (0 << 10) |
760                   (0 << 8));
761     OUT_BCS_BATCH(batch,
762                   ((slice_data_bit_offset >> 3) << 16) |
763                   (1 << 7)  |
764                   (0 << 5)  |
765                   (0 << 4)  |
766                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
767                   (slice_data_bit_offset & 0x7));
768     OUT_BCS_BATCH(batch, 0);
769     ADVANCE_BCS_BATCH(batch);
770 }
771
772 static inline void
773 gen8_mfd_avc_context_init(
774     VADriverContextP         ctx,
775     struct gen7_mfd_context *gen7_mfd_context
776 )
777 {
778     /* Initialize flat scaling lists */
779     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
780 }
781
782 static void
783 gen8_mfd_avc_decode_init(VADriverContextP ctx,
784                          struct decode_state *decode_state,
785                          struct gen7_mfd_context *gen7_mfd_context)
786 {
787     VAPictureParameterBufferH264 *pic_param;
788     VASliceParameterBufferH264 *slice_param;
789     struct i965_driver_data *i965 = i965_driver_data(ctx);
790     struct object_surface *obj_surface;
791     dri_bo *bo;
792     int i, j, enable_avc_ildb = 0;
793     unsigned int width_in_mbs, height_in_mbs;
794
795     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
796         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
797         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
798
799         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
800             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
801             assert((slice_param->slice_type == SLICE_TYPE_I) ||
802                    (slice_param->slice_type == SLICE_TYPE_SI) ||
803                    (slice_param->slice_type == SLICE_TYPE_P) ||
804                    (slice_param->slice_type == SLICE_TYPE_SP) ||
805                    (slice_param->slice_type == SLICE_TYPE_B));
806
807             if (slice_param->disable_deblocking_filter_idc != 1) {
808                 enable_avc_ildb = 1;
809                 break;
810             }
811
812             slice_param++;
813         }
814     }
815
816     assert(decode_state->pic_param && decode_state->pic_param->buffer);
817     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
818     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
819         gen7_mfd_context->reference_surface);
820     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
821     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
822     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
823     assert(height_in_mbs > 0 && height_in_mbs <= 256);
824
825     /* Current decoded picture */
826     obj_surface = decode_state->render_object;
827     if (pic_param->pic_fields.bits.reference_pic_flag)
828         obj_surface->flags |= SURFACE_REFERENCED;
829     else
830         obj_surface->flags &= ~SURFACE_REFERENCED;
831
832     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
833     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
834
835     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
836     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
837     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
838     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
839
840     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
841     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
842     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
843     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
844
845     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
846     bo = dri_bo_alloc(i965->intel.bufmgr,
847                       "intra row store",
848                       width_in_mbs * 64,
849                       0x1000);
850     assert(bo);
851     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
852     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
853
854     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
855     bo = dri_bo_alloc(i965->intel.bufmgr,
856                       "deblocking filter row store",
857                       width_in_mbs * 64 * 4,
858                       0x1000);
859     assert(bo);
860     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
861     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
862
863     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
864     bo = dri_bo_alloc(i965->intel.bufmgr,
865                       "bsd mpc row store",
866                       width_in_mbs * 64 * 2,
867                       0x1000);
868     assert(bo);
869     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
870     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
871
872     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
873     bo = dri_bo_alloc(i965->intel.bufmgr,
874                       "mpr row store",
875                       width_in_mbs * 64 * 2,
876                       0x1000);
877     assert(bo);
878     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
879     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
880
881     gen7_mfd_context->bitplane_read_buffer.valid = 0;
882 }
883
884 static void
885 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
886                             struct decode_state *decode_state,
887                             struct gen7_mfd_context *gen7_mfd_context)
888 {
889     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
890     VAPictureParameterBufferH264 *pic_param;
891     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
892     dri_bo *slice_data_bo;
893     int i, j;
894
895     assert(decode_state->pic_param && decode_state->pic_param->buffer);
896     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
897     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
898
899     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
900     intel_batchbuffer_emit_mi_flush(batch);
901     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
902     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
903     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
904     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
905     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
906     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
907     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
908
909     for (j = 0; j < decode_state->num_slice_params; j++) {
910         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
911         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
912         slice_data_bo = decode_state->slice_datas[j]->bo;
913         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
914
915         if (j == decode_state->num_slice_params - 1)
916             next_slice_group_param = NULL;
917         else
918             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
919
920         if (j == 0 && slice_param->first_mb_in_slice)
921             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); 
922
923         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
924             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
925             assert((slice_param->slice_type == SLICE_TYPE_I) ||
926                    (slice_param->slice_type == SLICE_TYPE_SI) ||
927                    (slice_param->slice_type == SLICE_TYPE_P) ||
928                    (slice_param->slice_type == SLICE_TYPE_SP) ||
929                    (slice_param->slice_type == SLICE_TYPE_B));
930
931             if (i < decode_state->slice_params[j]->num_elements - 1)
932                 next_slice_param = slice_param + 1;
933             else
934                 next_slice_param = next_slice_group_param;
935
936             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
937             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
938             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
939             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
940             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
941             slice_param++;
942         }
943     }
944
945     intel_batchbuffer_end_atomic(batch);
946     intel_batchbuffer_flush(batch);
947 }
948
949 static void
950 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
951                            struct decode_state *decode_state,
952                            struct gen7_mfd_context *gen7_mfd_context)
953 {
954     VAPictureParameterBufferMPEG2 *pic_param;
955     struct i965_driver_data *i965 = i965_driver_data(ctx);
956     struct object_surface *obj_surface;
957     dri_bo *bo;
958     unsigned int width_in_mbs;
959
960     assert(decode_state->pic_param && decode_state->pic_param->buffer);
961     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
962     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
963
964     mpeg2_set_reference_surfaces(
965         ctx,
966         gen7_mfd_context->reference_surface,
967         decode_state,
968         pic_param
969     );
970
971     /* Current decoded picture */
972     obj_surface = decode_state->render_object;
973     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
974
975     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
976     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
977     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
978     gen7_mfd_context->pre_deblocking_output.valid = 1;
979
980     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
981     bo = dri_bo_alloc(i965->intel.bufmgr,
982                       "bsd mpc row store",
983                       width_in_mbs * 96,
984                       0x1000);
985     assert(bo);
986     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
987     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
988
989     gen7_mfd_context->post_deblocking_output.valid = 0;
990     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
991     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
992     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
993     gen7_mfd_context->bitplane_read_buffer.valid = 0;
994 }
995
996 static void
997 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
998                          struct decode_state *decode_state,
999                          struct gen7_mfd_context *gen7_mfd_context)
1000 {
1001     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1002     VAPictureParameterBufferMPEG2 *pic_param;
1003     unsigned int slice_concealment_disable_bit = 0;
1004
1005     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1006     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1007
1008     slice_concealment_disable_bit = 1;
1009
1010     BEGIN_BCS_BATCH(batch, 13);
1011     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1012     OUT_BCS_BATCH(batch,
1013                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1014                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1015                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1016                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1017                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1018                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1019                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1020                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1021                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1022                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1023                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1024                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1025     OUT_BCS_BATCH(batch,
1026                   pic_param->picture_coding_type << 9);
1027     OUT_BCS_BATCH(batch,
1028                   (slice_concealment_disable_bit << 31) |
1029                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1030                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1031     OUT_BCS_BATCH(batch, 0);
1032     OUT_BCS_BATCH(batch, 0);
1033     OUT_BCS_BATCH(batch, 0);
1034     OUT_BCS_BATCH(batch, 0);
1035     OUT_BCS_BATCH(batch, 0);
1036     OUT_BCS_BATCH(batch, 0);
1037     OUT_BCS_BATCH(batch, 0);
1038     OUT_BCS_BATCH(batch, 0);
1039     OUT_BCS_BATCH(batch, 0);
1040     ADVANCE_BCS_BATCH(batch);
1041 }
1042
1043 static void
1044 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1045                         struct decode_state *decode_state,
1046                         struct gen7_mfd_context *gen7_mfd_context)
1047 {
1048     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1049     int i, j;
1050
1051     /* Update internal QM state */
1052     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1053         VAIQMatrixBufferMPEG2 * const iq_matrix =
1054             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1055
1056         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1057             iq_matrix->load_intra_quantiser_matrix) {
1058             gen_iq_matrix->load_intra_quantiser_matrix =
1059                 iq_matrix->load_intra_quantiser_matrix;
1060             if (iq_matrix->load_intra_quantiser_matrix) {
1061                 for (j = 0; j < 64; j++)
1062                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1063                         iq_matrix->intra_quantiser_matrix[j];
1064             }
1065         }
1066
1067         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1068             iq_matrix->load_non_intra_quantiser_matrix) {
1069             gen_iq_matrix->load_non_intra_quantiser_matrix =
1070                 iq_matrix->load_non_intra_quantiser_matrix;
1071             if (iq_matrix->load_non_intra_quantiser_matrix) {
1072                 for (j = 0; j < 64; j++)
1073                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1074                         iq_matrix->non_intra_quantiser_matrix[j];
1075             }
1076         }
1077     }
1078
1079     /* Commit QM state to HW */
1080     for (i = 0; i < 2; i++) {
1081         unsigned char *qm = NULL;
1082         int qm_type;
1083
1084         if (i == 0) {
1085             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1086                 qm = gen_iq_matrix->intra_quantiser_matrix;
1087                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1088             }
1089         } else {
1090             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1091                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1092                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1093             }
1094         }
1095
1096         if (!qm)
1097             continue;
1098
1099         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1100     }
1101 }
1102
1103 static void
1104 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1105                           VAPictureParameterBufferMPEG2 *pic_param,
1106                           VASliceParameterBufferMPEG2 *slice_param,
1107                           VASliceParameterBufferMPEG2 *next_slice_param,
1108                           struct gen7_mfd_context *gen7_mfd_context)
1109 {
1110     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1111     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1112     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1113
1114     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1115         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1116         is_field_pic = 1;
1117     is_field_pic_wa = is_field_pic &&
1118         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1119
1120     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1121     hpos0 = slice_param->slice_horizontal_position;
1122
1123     if (next_slice_param == NULL) {
1124         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1125         hpos1 = 0;
1126     } else {
1127         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1128         hpos1 = next_slice_param->slice_horizontal_position;
1129     }
1130
1131     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1132
1133     BEGIN_BCS_BATCH(batch, 5);
1134     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1135     OUT_BCS_BATCH(batch, 
1136                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1137     OUT_BCS_BATCH(batch, 
1138                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1139     OUT_BCS_BATCH(batch,
1140                   hpos0 << 24 |
1141                   vpos0 << 16 |
1142                   mb_count << 8 |
1143                   (next_slice_param == NULL) << 5 |
1144                   (next_slice_param == NULL) << 3 |
1145                   (slice_param->macroblock_offset & 0x7));
1146     OUT_BCS_BATCH(batch,
1147                   (slice_param->quantiser_scale_code << 24) |
1148                   (vpos1 << 8 | hpos1));
1149     ADVANCE_BCS_BATCH(batch);
1150 }
1151
1152 static void
1153 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1154                               struct decode_state *decode_state,
1155                               struct gen7_mfd_context *gen7_mfd_context)
1156 {
1157     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1158     VAPictureParameterBufferMPEG2 *pic_param;
1159     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1160     dri_bo *slice_data_bo;
1161     int i, j;
1162
1163     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1164     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1165
1166     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1167     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1168     intel_batchbuffer_emit_mi_flush(batch);
1169     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1170     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1171     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1172     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1173     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1174     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1175
1176     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1177         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1178             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1179
1180     for (j = 0; j < decode_state->num_slice_params; j++) {
1181         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1182         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1183         slice_data_bo = decode_state->slice_datas[j]->bo;
1184         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1185
1186         if (j == decode_state->num_slice_params - 1)
1187             next_slice_group_param = NULL;
1188         else
1189             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1190
1191         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1192             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1193
1194             if (i < decode_state->slice_params[j]->num_elements - 1)
1195                 next_slice_param = slice_param + 1;
1196             else
1197                 next_slice_param = next_slice_group_param;
1198
1199             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1200             slice_param++;
1201         }
1202     }
1203
1204     intel_batchbuffer_end_atomic(batch);
1205     intel_batchbuffer_flush(batch);
1206 }
1207
1208 static const int va_to_gen7_vc1_pic_type[5] = {
1209     GEN7_VC1_I_PICTURE,
1210     GEN7_VC1_P_PICTURE,
1211     GEN7_VC1_B_PICTURE,
1212     GEN7_VC1_BI_PICTURE,
1213     GEN7_VC1_P_PICTURE,
1214 };
1215
1216 static const int va_to_gen7_vc1_mv[4] = {
1217     1, /* 1-MV */
1218     2, /* 1-MV half-pel */
1219     3, /* 1-MV half-pef bilinear */
1220     0, /* Mixed MV */
1221 };
1222
1223 static const int b_picture_scale_factor[21] = {
1224     128, 85,  170, 64,  192,
1225     51,  102, 153, 204, 43,
1226     215, 37,  74,  111, 148,
1227     185, 222, 32,  96,  160, 
1228     224,
1229 };
1230
1231 static const int va_to_gen7_vc1_condover[3] = {
1232     0,
1233     2,
1234     3
1235 };
1236
1237 static const int va_to_gen7_vc1_profile[4] = {
1238     GEN7_VC1_SIMPLE_PROFILE,
1239     GEN7_VC1_MAIN_PROFILE,
1240     GEN7_VC1_RESERVED_PROFILE,
1241     GEN7_VC1_ADVANCED_PROFILE
1242 };
1243
1244 static void 
1245 gen8_mfd_free_vc1_surface(void **data)
1246 {
1247     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1248
1249     if (!gen7_vc1_surface)
1250         return;
1251
1252     dri_bo_unreference(gen7_vc1_surface->dmv);
1253     free(gen7_vc1_surface);
1254     *data = NULL;
1255 }
1256
1257 static void
1258 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1259                           VAPictureParameterBufferVC1 *pic_param,
1260                           struct object_surface *obj_surface)
1261 {
1262     struct i965_driver_data *i965 = i965_driver_data(ctx);
1263     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1264     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1265     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1266
1267     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1268
1269     if (!gen7_vc1_surface) {
1270         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1271         assert((obj_surface->size & 0x3f) == 0);
1272         obj_surface->private_data = gen7_vc1_surface;
1273     }
1274
1275     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1276
1277     if (gen7_vc1_surface->dmv == NULL) {
1278         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1279                                              "direct mv w/r buffer",
1280                                              width_in_mbs * height_in_mbs * 64,
1281                                              0x1000);
1282     }
1283 }
1284
1285 static void
1286 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1287                          struct decode_state *decode_state,
1288                          struct gen7_mfd_context *gen7_mfd_context)
1289 {
1290     VAPictureParameterBufferVC1 *pic_param;
1291     struct i965_driver_data *i965 = i965_driver_data(ctx);
1292     struct object_surface *obj_surface;
1293     dri_bo *bo;
1294     int width_in_mbs;
1295     int picture_type;
1296
1297     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1298     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1299     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1300     picture_type = pic_param->picture_fields.bits.picture_type;
1301  
1302     intel_update_vc1_frame_store_index(ctx,
1303                                        decode_state,
1304                                        pic_param,
1305                                        gen7_mfd_context->reference_surface);
1306
1307     /* Current decoded picture */
1308     obj_surface = decode_state->render_object;
1309     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1310     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1311
1312     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1313     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1314     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1315     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1316
1317     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1318     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1319     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1320     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1321
1322     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1323     bo = dri_bo_alloc(i965->intel.bufmgr,
1324                       "intra row store",
1325                       width_in_mbs * 64,
1326                       0x1000);
1327     assert(bo);
1328     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1329     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1330
1331     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1332     bo = dri_bo_alloc(i965->intel.bufmgr,
1333                       "deblocking filter row store",
1334                       width_in_mbs * 7 * 64,
1335                       0x1000);
1336     assert(bo);
1337     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1338     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1339
1340     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1341     bo = dri_bo_alloc(i965->intel.bufmgr,
1342                       "bsd mpc row store",
1343                       width_in_mbs * 96,
1344                       0x1000);
1345     assert(bo);
1346     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1347     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1348
1349     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1350
1351     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1352     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1353     
1354     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1355         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1356         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1357         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1358         int src_w, src_h;
1359         uint8_t *src = NULL, *dst = NULL;
1360
1361         assert(decode_state->bit_plane->buffer);
1362         src = decode_state->bit_plane->buffer;
1363
1364         bo = dri_bo_alloc(i965->intel.bufmgr,
1365                           "VC-1 Bitplane",
1366                           bitplane_width * height_in_mbs,
1367                           0x1000);
1368         assert(bo);
1369         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1370
1371         dri_bo_map(bo, True);
1372         assert(bo->virtual);
1373         dst = bo->virtual;
1374
1375         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1376             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1377                 int src_index, dst_index;
1378                 int src_shift;
1379                 uint8_t src_value;
1380
1381                 src_index = (src_h * width_in_mbs + src_w) / 2;
1382                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1383                 src_value = ((src[src_index] >> src_shift) & 0xf);
1384
1385                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1386                     src_value |= 0x2;
1387                 }
1388
1389                 dst_index = src_w / 2;
1390                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1391             }
1392
1393             if (src_w & 1)
1394                 dst[src_w / 2] >>= 4;
1395
1396             dst += bitplane_width;
1397         }
1398
1399         dri_bo_unmap(bo);
1400     } else
1401         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1402 }
1403
1404 static void
1405 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1406                        struct decode_state *decode_state,
1407                        struct gen7_mfd_context *gen7_mfd_context)
1408 {
1409     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1410     VAPictureParameterBufferVC1 *pic_param;
1411     struct object_surface *obj_surface;
1412     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1413     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1414     int unified_mv_mode;
1415     int ref_field_pic_polarity = 0;
1416     int scale_factor = 0;
1417     int trans_ac_y = 0;
1418     int dmv_surface_valid = 0;
1419     int brfd = 0;
1420     int fcm = 0;
1421     int picture_type;
1422     int profile;
1423     int overlap;
1424     int interpolation_mode = 0;
1425
1426     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1427     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1428
1429     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1430     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1431     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1432     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1433     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1434     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1435     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1436     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1437
1438     if (dquant == 0) {
1439         alt_pquant_config = 0;
1440         alt_pquant_edge_mask = 0;
1441     } else if (dquant == 2) {
1442         alt_pquant_config = 1;
1443         alt_pquant_edge_mask = 0xf;
1444     } else {
1445         assert(dquant == 1);
1446         if (dquantfrm == 0) {
1447             alt_pquant_config = 0;
1448             alt_pquant_edge_mask = 0;
1449             alt_pq = 0;
1450         } else {
1451             assert(dquantfrm == 1);
1452             alt_pquant_config = 1;
1453
1454             switch (dqprofile) {
1455             case 3:
1456                 if (dqbilevel == 0) {
1457                     alt_pquant_config = 2;
1458                     alt_pquant_edge_mask = 0;
1459                 } else {
1460                     assert(dqbilevel == 1);
1461                     alt_pquant_config = 3;
1462                     alt_pquant_edge_mask = 0;
1463                 }
1464                 break;
1465                 
1466             case 0:
1467                 alt_pquant_edge_mask = 0xf;
1468                 break;
1469
1470             case 1:
1471                 if (dqdbedge == 3)
1472                     alt_pquant_edge_mask = 0x9;
1473                 else
1474                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1475
1476                 break;
1477
1478             case 2:
1479                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1480                 break;
1481
1482             default:
1483                 assert(0);
1484             }
1485         }
1486     }
1487
1488     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1489         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1490         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1491     } else {
1492         assert(pic_param->mv_fields.bits.mv_mode < 4);
1493         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1494     }
1495
1496     if (pic_param->sequence_fields.bits.interlace == 1 &&
1497         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1498         /* FIXME: calculate reference field picture polarity */
1499         assert(0);
1500         ref_field_pic_polarity = 0;
1501     }
1502
1503     if (pic_param->b_picture_fraction < 21)
1504         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1505
1506     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1507     
1508     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1509         picture_type == GEN7_VC1_I_PICTURE)
1510         picture_type = GEN7_VC1_BI_PICTURE;
1511
1512     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1513         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1514     else {
1515         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1516
1517         /*
1518          * 8.3.6.2.1 Transform Type Selection
1519          * If variable-sized transform coding is not enabled,
1520          * then the 8x8 transform shall be used for all blocks.
1521          * it is also MFX_VC1_PIC_STATE requirement.
1522          */
1523         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1524             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1525             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1526         }
1527     }
1528
1529     if (picture_type == GEN7_VC1_B_PICTURE) {
1530         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1531
1532         obj_surface = decode_state->reference_objects[1];
1533
1534         if (obj_surface)
1535             gen7_vc1_surface = obj_surface->private_data;
1536
1537         if (!gen7_vc1_surface || 
1538             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1539              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1540             dmv_surface_valid = 0;
1541         else
1542             dmv_surface_valid = 1;
1543     }
1544
1545     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1546
1547     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1548         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1549     else {
1550         if (pic_param->picture_fields.bits.top_field_first)
1551             fcm = 2;
1552         else
1553             fcm = 3;
1554     }
1555
1556     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1557         brfd = pic_param->reference_fields.bits.reference_distance;
1558         brfd = (scale_factor * brfd) >> 8;
1559         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1560
1561         if (brfd < 0)
1562             brfd = 0;
1563     }
1564
1565     overlap = pic_param->sequence_fields.bits.overlap;
1566
1567     if (overlap) {
1568         overlap = 0;
1569         if (profile != GEN7_VC1_ADVANCED_PROFILE){
1570             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1571                 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1572                 overlap = 1;
1573             }
1574         }else {
1575             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1576                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1577                 overlap = 1;
1578             }
1579             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1580                 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1581                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1582                     overlap = 1;
1583                 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1584                            va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1585                     overlap = 1;
1586                 }
1587             }
1588         }
1589     } 
1590
1591     assert(pic_param->conditional_overlap_flag < 3);
1592     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1593
1594     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1595         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1596          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1597         interpolation_mode = 9; /* Half-pel bilinear */
1598     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1599              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1600               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1601         interpolation_mode = 1; /* Half-pel bicubic */
1602     else
1603         interpolation_mode = 0; /* Quarter-pel bicubic */
1604
1605     BEGIN_BCS_BATCH(batch, 6);
1606     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1607     OUT_BCS_BATCH(batch,
1608                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1609                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1610     OUT_BCS_BATCH(batch,
1611                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1612                   dmv_surface_valid << 15 |
1613                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1614                   pic_param->rounding_control << 13 |
1615                   pic_param->sequence_fields.bits.syncmarker << 12 |
1616                   interpolation_mode << 8 |
1617                   0 << 7 | /* FIXME: scale up or down ??? */
1618                   pic_param->range_reduction_frame << 6 |
1619                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1620                   overlap << 4 |
1621                   !pic_param->picture_fields.bits.is_first_field << 3 |
1622                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1623     OUT_BCS_BATCH(batch,
1624                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1625                   picture_type << 26 |
1626                   fcm << 24 |
1627                   alt_pq << 16 |
1628                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1629                   scale_factor << 0);
1630     OUT_BCS_BATCH(batch,
1631                   unified_mv_mode << 28 |
1632                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1633                   pic_param->fast_uvmc_flag << 26 |
1634                   ref_field_pic_polarity << 25 |
1635                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1636                   pic_param->reference_fields.bits.reference_distance << 20 |
1637                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1638                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1639                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1640                   alt_pquant_edge_mask << 4 |
1641                   alt_pquant_config << 2 |
1642                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1643                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1644     OUT_BCS_BATCH(batch,
1645                   !!pic_param->bitplane_present.value << 31 |
1646                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1647                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1648                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1649                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1650                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1651                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1652                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1653                   pic_param->mv_fields.bits.mv_table << 20 |
1654                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1655                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1656                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1657                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1658                   pic_param->mb_mode_table << 8 |
1659                   trans_ac_y << 6 |
1660                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1661                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1662                   pic_param->cbp_table << 0);
1663     ADVANCE_BCS_BATCH(batch);
1664 }
1665
1666 static void
1667 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1668                              struct decode_state *decode_state,
1669                              struct gen7_mfd_context *gen7_mfd_context)
1670 {
1671     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1672     VAPictureParameterBufferVC1 *pic_param;
1673     int intensitycomp_single;
1674
1675     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1676     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1677
1678     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1679     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1680     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1681
1682     BEGIN_BCS_BATCH(batch, 6);
1683     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1684     OUT_BCS_BATCH(batch,
1685                   0 << 14 | /* FIXME: double ??? */
1686                   0 << 12 |
1687                   intensitycomp_single << 10 |
1688                   intensitycomp_single << 8 |
1689                   0 << 4 | /* FIXME: interlace mode */
1690                   0);
1691     OUT_BCS_BATCH(batch,
1692                   pic_param->luma_shift << 16 |
1693                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1694     OUT_BCS_BATCH(batch, 0);
1695     OUT_BCS_BATCH(batch, 0);
1696     OUT_BCS_BATCH(batch, 0);
1697     ADVANCE_BCS_BATCH(batch);
1698 }
1699
1700 static void
1701 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1702                               struct decode_state *decode_state,
1703                               struct gen7_mfd_context *gen7_mfd_context)
1704 {
1705     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1706     struct object_surface *obj_surface;
1707     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1708
1709     obj_surface = decode_state->render_object;
1710
1711     if (obj_surface && obj_surface->private_data) {
1712         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1713     }
1714
1715     obj_surface = decode_state->reference_objects[1];
1716
1717     if (obj_surface && obj_surface->private_data) {
1718         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1719     }
1720
1721     BEGIN_BCS_BATCH(batch, 7);
1722     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1723
1724     if (dmv_write_buffer)
1725         OUT_BCS_RELOC(batch, dmv_write_buffer,
1726                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1727                       0);
1728     else
1729         OUT_BCS_BATCH(batch, 0);
1730
1731     OUT_BCS_BATCH(batch, 0);
1732     OUT_BCS_BATCH(batch, 0);
1733
1734     if (dmv_read_buffer)
1735         OUT_BCS_RELOC(batch, dmv_read_buffer,
1736                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1737                       0);
1738     else
1739         OUT_BCS_BATCH(batch, 0);
1740     
1741     OUT_BCS_BATCH(batch, 0);
1742     OUT_BCS_BATCH(batch, 0);
1743                   
1744     ADVANCE_BCS_BATCH(batch);
1745 }
1746
1747 static int
1748 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1749 {
1750     int out_slice_data_bit_offset;
1751     int slice_header_size = in_slice_data_bit_offset / 8;
1752     int i, j;
1753
1754     if (profile != 3)
1755         out_slice_data_bit_offset = in_slice_data_bit_offset;
1756     else {
1757         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1758             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1759                 i++, j += 2;
1760             }
1761         }
1762
1763         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1764     }
1765
1766     return out_slice_data_bit_offset;
1767 }
1768
1769 static void
1770 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1771                         VAPictureParameterBufferVC1 *pic_param,
1772                         VASliceParameterBufferVC1 *slice_param,
1773                         VASliceParameterBufferVC1 *next_slice_param,
1774                         dri_bo *slice_data_bo,
1775                         struct gen7_mfd_context *gen7_mfd_context)
1776 {
1777     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1778     int next_slice_start_vert_pos;
1779     int macroblock_offset;
1780     uint8_t *slice_data = NULL;
1781
1782     dri_bo_map(slice_data_bo, 0);
1783     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1784     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1785                                                                slice_param->macroblock_offset,
1786                                                                pic_param->sequence_fields.bits.profile);
1787     dri_bo_unmap(slice_data_bo);
1788
1789     if (next_slice_param)
1790         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1791     else
1792         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1793
1794     BEGIN_BCS_BATCH(batch, 5);
1795     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1796     OUT_BCS_BATCH(batch, 
1797                   slice_param->slice_data_size - (macroblock_offset >> 3));
1798     OUT_BCS_BATCH(batch, 
1799                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1800     OUT_BCS_BATCH(batch,
1801                   slice_param->slice_vertical_position << 16 |
1802                   next_slice_start_vert_pos << 0);
1803     OUT_BCS_BATCH(batch,
1804                   (macroblock_offset & 0x7));
1805     ADVANCE_BCS_BATCH(batch);
1806 }
1807
1808 static void
1809 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1810                             struct decode_state *decode_state,
1811                             struct gen7_mfd_context *gen7_mfd_context)
1812 {
1813     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1814     VAPictureParameterBufferVC1 *pic_param;
1815     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1816     dri_bo *slice_data_bo;
1817     int i, j;
1818
1819     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1820     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1821
1822     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1823     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1824     intel_batchbuffer_emit_mi_flush(batch);
1825     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1826     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1827     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1828     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1829     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1830     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1831     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1832
1833     for (j = 0; j < decode_state->num_slice_params; j++) {
1834         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1835         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1836         slice_data_bo = decode_state->slice_datas[j]->bo;
1837         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1838
1839         if (j == decode_state->num_slice_params - 1)
1840             next_slice_group_param = NULL;
1841         else
1842             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1843
1844         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1845             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1846
1847             if (i < decode_state->slice_params[j]->num_elements - 1)
1848                 next_slice_param = slice_param + 1;
1849             else
1850                 next_slice_param = next_slice_group_param;
1851
1852             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1853             slice_param++;
1854         }
1855     }
1856
1857     intel_batchbuffer_end_atomic(batch);
1858     intel_batchbuffer_flush(batch);
1859 }
1860
1861 static void
1862 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1863                           struct decode_state *decode_state,
1864                           struct gen7_mfd_context *gen7_mfd_context)
1865 {
1866     struct object_surface *obj_surface;
1867     VAPictureParameterBufferJPEGBaseline *pic_param;
1868     int subsampling = SUBSAMPLE_YUV420;
1869     int fourcc = VA_FOURCC_IMC3;
1870
1871     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1872
1873     if (pic_param->num_components == 1)
1874         subsampling = SUBSAMPLE_YUV400;
1875     else if (pic_param->num_components == 3) {
1876         int h1 = pic_param->components[0].h_sampling_factor;
1877         int h2 = pic_param->components[1].h_sampling_factor;
1878         int h3 = pic_param->components[2].h_sampling_factor;
1879         int v1 = pic_param->components[0].v_sampling_factor;
1880         int v2 = pic_param->components[1].v_sampling_factor;
1881         int v3 = pic_param->components[2].v_sampling_factor;
1882
1883         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1884             v1 == 2 && v2 == 1 && v3 == 1) {
1885             subsampling = SUBSAMPLE_YUV420;
1886             fourcc = VA_FOURCC_IMC3;
1887         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1888                    v1 == 1 && v2 == 1 && v3 == 1) {
1889             subsampling = SUBSAMPLE_YUV422H;
1890             fourcc = VA_FOURCC_422H;
1891         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1892                    v1 == 1 && v2 == 1 && v3 == 1) {
1893             subsampling = SUBSAMPLE_YUV444;
1894             fourcc = VA_FOURCC_444P;
1895         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1896                    v1 == 1 && v2 == 1 && v3 == 1) {
1897             subsampling = SUBSAMPLE_YUV411;
1898             fourcc = VA_FOURCC_411P;
1899         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1900                    v1 == 2 && v2 == 1 && v3 == 1) {
1901             subsampling = SUBSAMPLE_YUV422V;
1902             fourcc = VA_FOURCC_422V;
1903         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1904                    v1 == 2 && v2 == 2 && v3 == 2) {
1905             subsampling = SUBSAMPLE_YUV422H;
1906             fourcc = VA_FOURCC_422H;
1907         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1908                    v1 == 2 && v2 == 1 && v3 == 1) {
1909             subsampling = SUBSAMPLE_YUV422V;
1910             fourcc = VA_FOURCC_422V;
1911         } else
1912             assert(0);
1913     }
1914     else {
1915         assert(0);
1916     }
1917
1918     /* Current decoded picture */
1919     obj_surface = decode_state->render_object;
1920     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1921
1922     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1923     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1924     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1925     gen7_mfd_context->pre_deblocking_output.valid = 1;
1926
1927     gen7_mfd_context->post_deblocking_output.bo = NULL;
1928     gen7_mfd_context->post_deblocking_output.valid = 0;
1929
1930     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1931     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1932
1933     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1934     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1935
1936     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1937     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1938
1939     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1940     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1941
1942     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1943     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1944 }
1945
1946 static const int va_to_gen7_jpeg_rotation[4] = {
1947     GEN7_JPEG_ROTATION_0,
1948     GEN7_JPEG_ROTATION_90,
1949     GEN7_JPEG_ROTATION_180,
1950     GEN7_JPEG_ROTATION_270
1951 };
1952
1953 static void
1954 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1955                         struct decode_state *decode_state,
1956                         struct gen7_mfd_context *gen7_mfd_context)
1957 {
1958     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1959     VAPictureParameterBufferJPEGBaseline *pic_param;
1960     int chroma_type = GEN7_YUV420;
1961     int frame_width_in_blks;
1962     int frame_height_in_blks;
1963
1964     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1965     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1966
1967     if (pic_param->num_components == 1)
1968         chroma_type = GEN7_YUV400;
1969     else if (pic_param->num_components == 3) {
1970         int h1 = pic_param->components[0].h_sampling_factor;
1971         int h2 = pic_param->components[1].h_sampling_factor;
1972         int h3 = pic_param->components[2].h_sampling_factor;
1973         int v1 = pic_param->components[0].v_sampling_factor;
1974         int v2 = pic_param->components[1].v_sampling_factor;
1975         int v3 = pic_param->components[2].v_sampling_factor;
1976
1977         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1978             v1 == 2 && v2 == 1 && v3 == 1)
1979             chroma_type = GEN7_YUV420;
1980         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1981                  v1 == 1 && v2 == 1 && v3 == 1)
1982             chroma_type = GEN7_YUV422H_2Y;
1983         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1984                  v1 == 1 && v2 == 1 && v3 == 1)
1985             chroma_type = GEN7_YUV444;
1986         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1987                  v1 == 1 && v2 == 1 && v3 == 1)
1988             chroma_type = GEN7_YUV411;
1989         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1990                  v1 == 2 && v2 == 1 && v3 == 1)
1991             chroma_type = GEN7_YUV422V_2Y;
1992         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1993                  v1 == 2 && v2 == 2 && v3 == 2)
1994             chroma_type = GEN7_YUV422H_4Y;
1995         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1996                  v1 == 2 && v2 == 1 && v3 == 1)
1997             chroma_type = GEN7_YUV422V_4Y;
1998         else
1999             assert(0);
2000     }
2001
2002     if (chroma_type == GEN7_YUV400 ||
2003         chroma_type == GEN7_YUV444 ||
2004         chroma_type == GEN7_YUV422V_2Y) {
2005         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2006         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2007     } else if (chroma_type == GEN7_YUV411) {
2008         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2009         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2010     } else {
2011         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2012         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2013     }
2014
2015     BEGIN_BCS_BATCH(batch, 3);
2016     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2017     OUT_BCS_BATCH(batch,
2018                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2019                   (chroma_type << 0));
2020     OUT_BCS_BATCH(batch,
2021                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2022                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2023     ADVANCE_BCS_BATCH(batch);
2024 }
2025
2026 static const int va_to_gen7_jpeg_hufftable[2] = {
2027     MFX_HUFFTABLE_ID_Y,
2028     MFX_HUFFTABLE_ID_UV
2029 };
2030
2031 static void
2032 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2033                                struct decode_state *decode_state,
2034                                struct gen7_mfd_context *gen7_mfd_context,
2035                                int num_tables)
2036 {
2037     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2038     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2039     int index;
2040
2041     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2042         return;
2043
2044     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2045
2046     for (index = 0; index < num_tables; index++) {
2047         int id = va_to_gen7_jpeg_hufftable[index];
2048         if (!huffman_table->load_huffman_table[index])
2049             continue;
2050         BEGIN_BCS_BATCH(batch, 53);
2051         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2052         OUT_BCS_BATCH(batch, id);
2053         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2054         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2055         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2056         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2057         ADVANCE_BCS_BATCH(batch);
2058     }
2059 }
2060
2061 static const int va_to_gen7_jpeg_qm[5] = {
2062     -1,
2063     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2064     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2065     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2066     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2067 };
2068
2069 static void
2070 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2071                        struct decode_state *decode_state,
2072                        struct gen7_mfd_context *gen7_mfd_context)
2073 {
2074     VAPictureParameterBufferJPEGBaseline *pic_param;
2075     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2076     int index;
2077
2078     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2079         return;
2080
2081     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2082     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2083
2084     assert(pic_param->num_components <= 3);
2085
2086     for (index = 0; index < pic_param->num_components; index++) {
2087         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2088         int qm_type;
2089         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2090         unsigned char raster_qm[64];
2091         int j;
2092
2093         if (id > 4 || id < 1)
2094             continue;
2095
2096         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2097             continue;
2098
2099         qm_type = va_to_gen7_jpeg_qm[id];
2100
2101         for (j = 0; j < 64; j++)
2102             raster_qm[zigzag_direct[j]] = qm[j];
2103
2104         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2105     }
2106 }
2107
2108 static void
2109 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2110                          VAPictureParameterBufferJPEGBaseline *pic_param,
2111                          VASliceParameterBufferJPEGBaseline *slice_param,
2112                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2113                          dri_bo *slice_data_bo,
2114                          struct gen7_mfd_context *gen7_mfd_context)
2115 {
2116     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2117     int scan_component_mask = 0;
2118     int i;
2119
2120     assert(slice_param->num_components > 0);
2121     assert(slice_param->num_components < 4);
2122     assert(slice_param->num_components <= pic_param->num_components);
2123
2124     for (i = 0; i < slice_param->num_components; i++) {
2125         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2126         case 1:
2127             scan_component_mask |= (1 << 0);
2128             break;
2129         case 2:
2130             scan_component_mask |= (1 << 1);
2131             break;
2132         case 3:
2133             scan_component_mask |= (1 << 2);
2134             break;
2135         default:
2136             assert(0);
2137             break;
2138         }
2139     }
2140
2141     BEGIN_BCS_BATCH(batch, 6);
2142     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2143     OUT_BCS_BATCH(batch, 
2144                   slice_param->slice_data_size);
2145     OUT_BCS_BATCH(batch, 
2146                   slice_param->slice_data_offset);
2147     OUT_BCS_BATCH(batch,
2148                   slice_param->slice_horizontal_position << 16 |
2149                   slice_param->slice_vertical_position << 0);
2150     OUT_BCS_BATCH(batch,
2151                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2152                   (scan_component_mask << 27) |                 /* scan components */
2153                   (0 << 26) |   /* disable interrupt allowed */
2154                   (slice_param->num_mcus << 0));                /* MCU count */
2155     OUT_BCS_BATCH(batch,
2156                   (slice_param->restart_interval << 0));    /* RestartInterval */
2157     ADVANCE_BCS_BATCH(batch);
2158 }
2159
2160 /* Workaround for JPEG decoding on Ivybridge */
2161 #ifdef JPEG_WA
2162
2163 static struct {
2164     int width;
2165     int height;
2166     unsigned char data[32];
2167     int data_size;
2168     int data_bit_offset;
2169     int qp;
2170 } gen7_jpeg_wa_clip = {
2171     16,
2172     16,
2173     {
2174         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2175         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2176     },
2177     14,
2178     40,
2179     28,
2180 };
2181
2182 static void
2183 gen8_jpeg_wa_init(VADriverContextP ctx,
2184                   struct gen7_mfd_context *gen7_mfd_context)
2185 {
2186     struct i965_driver_data *i965 = i965_driver_data(ctx);
2187     VAStatus status;
2188     struct object_surface *obj_surface;
2189
2190     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2191         i965_DestroySurfaces(ctx,
2192                              &gen7_mfd_context->jpeg_wa_surface_id,
2193                              1);
2194
2195     status = i965_CreateSurfaces(ctx,
2196                                  gen7_jpeg_wa_clip.width,
2197                                  gen7_jpeg_wa_clip.height,
2198                                  VA_RT_FORMAT_YUV420,
2199                                  1,
2200                                  &gen7_mfd_context->jpeg_wa_surface_id);
2201     assert(status == VA_STATUS_SUCCESS);
2202
2203     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2204     assert(obj_surface);
2205     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2206     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2207
2208     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2209         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2210                                                                "JPEG WA data",
2211                                                                0x1000,
2212                                                                0x1000);
2213         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2214                        0,
2215                        gen7_jpeg_wa_clip.data_size,
2216                        gen7_jpeg_wa_clip.data);
2217     }
2218 }
2219
2220 static void
2221 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2222                               struct gen7_mfd_context *gen7_mfd_context)
2223 {
2224     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2225
2226     BEGIN_BCS_BATCH(batch, 5);
2227     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2228     OUT_BCS_BATCH(batch,
2229                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2230                   (MFD_MODE_VLD << 15) | /* VLD mode */
2231                   (0 << 10) | /* disable Stream-Out */
2232                   (0 << 9)  | /* Post Deblocking Output */
2233                   (1 << 8)  | /* Pre Deblocking Output */
2234                   (0 << 5)  | /* not in stitch mode */
2235                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2236                   (MFX_FORMAT_AVC << 0));
2237     OUT_BCS_BATCH(batch,
2238                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2239                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2240                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2241                   (0 << 1)  |
2242                   (0 << 0));
2243     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2244     OUT_BCS_BATCH(batch, 0); /* reserved */
2245     ADVANCE_BCS_BATCH(batch);
2246 }
2247
2248 static void
2249 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2250                            struct gen7_mfd_context *gen7_mfd_context)
2251 {
2252     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2253     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2254
2255     BEGIN_BCS_BATCH(batch, 6);
2256     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2257     OUT_BCS_BATCH(batch, 0);
2258     OUT_BCS_BATCH(batch,
2259                   ((obj_surface->orig_width - 1) << 18) |
2260                   ((obj_surface->orig_height - 1) << 4));
2261     OUT_BCS_BATCH(batch,
2262                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2263                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2264                   (0 << 22) | /* surface object control state, ignored */
2265                   ((obj_surface->width - 1) << 3) | /* pitch */
2266                   (0 << 2)  | /* must be 0 */
2267                   (1 << 1)  | /* must be tiled */
2268                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2269     OUT_BCS_BATCH(batch,
2270                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2271                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2272     OUT_BCS_BATCH(batch,
2273                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2274                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2275     ADVANCE_BCS_BATCH(batch);
2276 }
2277
2278 static void
2279 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2280                                  struct gen7_mfd_context *gen7_mfd_context)
2281 {
2282     struct i965_driver_data *i965 = i965_driver_data(ctx);
2283     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2284     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2285     dri_bo *intra_bo;
2286     int i;
2287
2288     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2289                             "intra row store",
2290                             128 * 64,
2291                             0x1000);
2292
2293     BEGIN_BCS_BATCH(batch, 61);
2294     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2295     OUT_BCS_RELOC(batch,
2296                   obj_surface->bo,
2297                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2298                   0);
2299         OUT_BCS_BATCH(batch, 0);
2300         OUT_BCS_BATCH(batch, 0);
2301     
2302
2303     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2304         OUT_BCS_BATCH(batch, 0);
2305         OUT_BCS_BATCH(batch, 0);
2306
2307         /* uncompressed-video & stream out 7-12 */
2308     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2309     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2310         OUT_BCS_BATCH(batch, 0);
2311         OUT_BCS_BATCH(batch, 0);
2312         OUT_BCS_BATCH(batch, 0);
2313         OUT_BCS_BATCH(batch, 0);
2314
2315         /* the DW 13-15 is for intra row store scratch */
2316     OUT_BCS_RELOC(batch,
2317                   intra_bo,
2318                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2319                   0);
2320         OUT_BCS_BATCH(batch, 0);
2321         OUT_BCS_BATCH(batch, 0);
2322
2323         /* the DW 16-18 is for deblocking filter */ 
2324     OUT_BCS_BATCH(batch, 0);
2325         OUT_BCS_BATCH(batch, 0);
2326         OUT_BCS_BATCH(batch, 0);
2327
2328     /* DW 19..50 */
2329     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2330         OUT_BCS_BATCH(batch, 0);
2331         OUT_BCS_BATCH(batch, 0);
2332     }
2333     OUT_BCS_BATCH(batch, 0);
2334
2335         /* the DW52-54 is for mb status address */
2336     OUT_BCS_BATCH(batch, 0);
2337         OUT_BCS_BATCH(batch, 0);
2338         OUT_BCS_BATCH(batch, 0);
2339         /* the DW56-60 is for ILDB & second ILDB address */
2340     OUT_BCS_BATCH(batch, 0);
2341         OUT_BCS_BATCH(batch, 0);
2342         OUT_BCS_BATCH(batch, 0);
2343     OUT_BCS_BATCH(batch, 0);
2344         OUT_BCS_BATCH(batch, 0);
2345         OUT_BCS_BATCH(batch, 0);
2346
2347     ADVANCE_BCS_BATCH(batch);
2348
2349     dri_bo_unreference(intra_bo);
2350 }
2351
2352 static void
2353 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2354                                      struct gen7_mfd_context *gen7_mfd_context)
2355 {
2356     struct i965_driver_data *i965 = i965_driver_data(ctx);
2357     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2358     dri_bo *bsd_mpc_bo, *mpr_bo;
2359
2360     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2361                               "bsd mpc row store",
2362                               11520, /* 1.5 * 120 * 64 */
2363                               0x1000);
2364
2365     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2366                           "mpr row store",
2367                           7680, /* 1. 0 * 120 * 64 */
2368                           0x1000);
2369
2370     BEGIN_BCS_BATCH(batch, 10);
2371     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2372
2373     OUT_BCS_RELOC(batch,
2374                   bsd_mpc_bo,
2375                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2376                   0);
2377
2378     OUT_BCS_BATCH(batch, 0);
2379     OUT_BCS_BATCH(batch, 0);
2380
2381     OUT_BCS_RELOC(batch,
2382                   mpr_bo,
2383                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2384                   0);
2385     OUT_BCS_BATCH(batch, 0);
2386     OUT_BCS_BATCH(batch, 0);
2387
2388     OUT_BCS_BATCH(batch, 0);
2389     OUT_BCS_BATCH(batch, 0);
2390     OUT_BCS_BATCH(batch, 0);
2391
2392     ADVANCE_BCS_BATCH(batch);
2393
2394     dri_bo_unreference(bsd_mpc_bo);
2395     dri_bo_unreference(mpr_bo);
2396 }
2397
2398 static void
2399 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2400                           struct gen7_mfd_context *gen7_mfd_context)
2401 {
2402
2403 }
2404
2405 static void
2406 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2407                            struct gen7_mfd_context *gen7_mfd_context)
2408 {
2409     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2410     int img_struct = 0;
2411     int mbaff_frame_flag = 0;
2412     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2413
2414     BEGIN_BCS_BATCH(batch, 16);
2415     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2416     OUT_BCS_BATCH(batch, 
2417                   width_in_mbs * height_in_mbs);
2418     OUT_BCS_BATCH(batch, 
2419                   ((height_in_mbs - 1) << 16) | 
2420                   ((width_in_mbs - 1) << 0));
2421     OUT_BCS_BATCH(batch, 
2422                   (0 << 24) |
2423                   (0 << 16) |
2424                   (0 << 14) |
2425                   (0 << 13) |
2426                   (0 << 12) | /* differ from GEN6 */
2427                   (0 << 10) |
2428                   (img_struct << 8));
2429     OUT_BCS_BATCH(batch,
2430                   (1 << 10) | /* 4:2:0 */
2431                   (1 << 7) |  /* CABAC */
2432                   (0 << 6) |
2433                   (0 << 5) |
2434                   (0 << 4) |
2435                   (0 << 3) |
2436                   (1 << 2) |
2437                   (mbaff_frame_flag << 1) |
2438                   (0 << 0));
2439     OUT_BCS_BATCH(batch, 0);
2440     OUT_BCS_BATCH(batch, 0);
2441     OUT_BCS_BATCH(batch, 0);
2442     OUT_BCS_BATCH(batch, 0);
2443     OUT_BCS_BATCH(batch, 0);
2444     OUT_BCS_BATCH(batch, 0);
2445     OUT_BCS_BATCH(batch, 0);
2446     OUT_BCS_BATCH(batch, 0);
2447     OUT_BCS_BATCH(batch, 0);
2448     OUT_BCS_BATCH(batch, 0);
2449     OUT_BCS_BATCH(batch, 0);
2450     ADVANCE_BCS_BATCH(batch);
2451 }
2452
2453 static void
2454 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2455                                   struct gen7_mfd_context *gen7_mfd_context)
2456 {
2457     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2458     int i;
2459
2460     BEGIN_BCS_BATCH(batch, 71);
2461     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2462
2463     /* reference surfaces 0..15 */
2464     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2465         OUT_BCS_BATCH(batch, 0); /* top */
2466         OUT_BCS_BATCH(batch, 0); /* bottom */
2467     }
2468         
2469         OUT_BCS_BATCH(batch, 0);
2470
2471     /* the current decoding frame/field */
2472     OUT_BCS_BATCH(batch, 0); /* top */
2473     OUT_BCS_BATCH(batch, 0);
2474     OUT_BCS_BATCH(batch, 0);
2475
2476     /* POC List */
2477     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2478         OUT_BCS_BATCH(batch, 0);
2479         OUT_BCS_BATCH(batch, 0);
2480     }
2481
2482     OUT_BCS_BATCH(batch, 0);
2483     OUT_BCS_BATCH(batch, 0);
2484
2485     ADVANCE_BCS_BATCH(batch);
2486 }
2487
2488 static void
2489 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2490                                      struct gen7_mfd_context *gen7_mfd_context)
2491 {
2492     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2493
2494     BEGIN_BCS_BATCH(batch, 11);
2495     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2496     OUT_BCS_RELOC(batch,
2497                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2498                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2499                   0);
2500     OUT_BCS_BATCH(batch, 0);
2501     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2502     OUT_BCS_BATCH(batch, 0);
2503     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2504     OUT_BCS_BATCH(batch, 0);
2505     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2506     OUT_BCS_BATCH(batch, 0);
2507     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2508     OUT_BCS_BATCH(batch, 0);
2509     ADVANCE_BCS_BATCH(batch);
2510 }
2511
2512 static void
2513 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2514                             struct gen7_mfd_context *gen7_mfd_context)
2515 {
2516     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2517
2518     /* the input bitsteam format on GEN7 differs from GEN6 */
2519     BEGIN_BCS_BATCH(batch, 6);
2520     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2521     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2522     OUT_BCS_BATCH(batch, 0);
2523     OUT_BCS_BATCH(batch,
2524                   (0 << 31) |
2525                   (0 << 14) |
2526                   (0 << 12) |
2527                   (0 << 10) |
2528                   (0 << 8));
2529     OUT_BCS_BATCH(batch,
2530                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2531                   (0 << 5)  |
2532                   (0 << 4)  |
2533                   (1 << 3) | /* LastSlice Flag */
2534                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2535     OUT_BCS_BATCH(batch, 0);
2536     ADVANCE_BCS_BATCH(batch);
2537 }
2538
2539 static void
2540 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2541                              struct gen7_mfd_context *gen7_mfd_context)
2542 {
2543     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2544     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2545     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2546     int first_mb_in_slice = 0;
2547     int slice_type = SLICE_TYPE_I;
2548
2549     BEGIN_BCS_BATCH(batch, 11);
2550     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2551     OUT_BCS_BATCH(batch, slice_type);
2552     OUT_BCS_BATCH(batch, 
2553                   (num_ref_idx_l1 << 24) |
2554                   (num_ref_idx_l0 << 16) |
2555                   (0 << 8) |
2556                   (0 << 0));
2557     OUT_BCS_BATCH(batch, 
2558                   (0 << 29) |
2559                   (1 << 27) |   /* disable Deblocking */
2560                   (0 << 24) |
2561                   (gen7_jpeg_wa_clip.qp << 16) |
2562                   (0 << 8) |
2563                   (0 << 0));
2564     OUT_BCS_BATCH(batch, 
2565                   (slice_ver_pos << 24) |
2566                   (slice_hor_pos << 16) | 
2567                   (first_mb_in_slice << 0));
2568     OUT_BCS_BATCH(batch,
2569                   (next_slice_ver_pos << 16) |
2570                   (next_slice_hor_pos << 0));
2571     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2572     OUT_BCS_BATCH(batch, 0);
2573     OUT_BCS_BATCH(batch, 0);
2574     OUT_BCS_BATCH(batch, 0);
2575     OUT_BCS_BATCH(batch, 0);
2576     ADVANCE_BCS_BATCH(batch);
2577 }
2578
2579 static void
2580 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2581                  struct gen7_mfd_context *gen7_mfd_context)
2582 {
2583     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2584     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2585     intel_batchbuffer_emit_mi_flush(batch);
2586     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2587     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2588     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2589     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2590     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2591     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2592     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2593
2594     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2595     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2596     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2597 }
2598
2599 #endif
2600
2601 void
2602 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2603                              struct decode_state *decode_state,
2604                              struct gen7_mfd_context *gen7_mfd_context)
2605 {
2606     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2607     VAPictureParameterBufferJPEGBaseline *pic_param;
2608     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2609     dri_bo *slice_data_bo;
2610     int i, j, max_selector = 0;
2611
2612     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2613     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2614
2615     /* Currently only support Baseline DCT */
2616     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2617     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2618 #ifdef JPEG_WA
2619     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2620 #endif
2621     intel_batchbuffer_emit_mi_flush(batch);
2622     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2623     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2624     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2625     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2626     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2627
2628     for (j = 0; j < decode_state->num_slice_params; j++) {
2629         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2630         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2631         slice_data_bo = decode_state->slice_datas[j]->bo;
2632         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2633
2634         if (j == decode_state->num_slice_params - 1)
2635             next_slice_group_param = NULL;
2636         else
2637             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2638
2639         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2640             int component;
2641
2642             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2643
2644             if (i < decode_state->slice_params[j]->num_elements - 1)
2645                 next_slice_param = slice_param + 1;
2646             else
2647                 next_slice_param = next_slice_group_param;
2648
2649             for (component = 0; component < slice_param->num_components; component++) {
2650                 if (max_selector < slice_param->components[component].dc_table_selector)
2651                     max_selector = slice_param->components[component].dc_table_selector;
2652
2653                 if (max_selector < slice_param->components[component].ac_table_selector)
2654                     max_selector = slice_param->components[component].ac_table_selector;
2655             }
2656
2657             slice_param++;
2658         }
2659     }
2660
2661     assert(max_selector < 2);
2662     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2663
2664     for (j = 0; j < decode_state->num_slice_params; j++) {
2665         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2666         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2667         slice_data_bo = decode_state->slice_datas[j]->bo;
2668         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2669
2670         if (j == decode_state->num_slice_params - 1)
2671             next_slice_group_param = NULL;
2672         else
2673             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2674
2675         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2676             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2677
2678             if (i < decode_state->slice_params[j]->num_elements - 1)
2679                 next_slice_param = slice_param + 1;
2680             else
2681                 next_slice_param = next_slice_group_param;
2682
2683             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2684             slice_param++;
2685         }
2686     }
2687
2688     intel_batchbuffer_end_atomic(batch);
2689     intel_batchbuffer_flush(batch);
2690 }
2691
2692 static const int vp8_dc_qlookup[128] =
2693 {
2694       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2695      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2696      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2697      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2698      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2699      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2700      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2701     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2702 };
2703
2704 static const int vp8_ac_qlookup[128] =
2705 {
2706       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2707      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2708      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2709      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2710      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2711     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2712     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2713     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2714 };
2715
2716 static inline unsigned int vp8_clip_quantization_index(int index)
2717 {
2718     if(index > 127)
2719         return 127;
2720     else if(index <0)
2721         return 0;
2722
2723     return index;
2724 }
2725
2726 static void
2727 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2728                           struct decode_state *decode_state,
2729                           struct gen7_mfd_context *gen7_mfd_context)
2730 {
2731     struct object_surface *obj_surface;
2732     struct i965_driver_data *i965 = i965_driver_data(ctx);
2733     dri_bo *bo;
2734     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2735     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2736     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2737
2738     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2739     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2740
2741     intel_update_vp8_frame_store_index(ctx,
2742                                        decode_state,
2743                                        pic_param,
2744                                        gen7_mfd_context->reference_surface);
2745
2746     /* Current decoded picture */
2747     obj_surface = decode_state->render_object;
2748     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2749
2750     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2751     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2752     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2753     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2754
2755     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2756     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2757     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2758     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2759
2760     intel_ensure_vp8_segmentation_buffer(ctx,
2761         &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2762
2763     /* The same as AVC */
2764     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2765     bo = dri_bo_alloc(i965->intel.bufmgr,
2766                       "intra row store",
2767                       width_in_mbs * 64,
2768                       0x1000);
2769     assert(bo);
2770     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2771     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2772
2773     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2774     bo = dri_bo_alloc(i965->intel.bufmgr,
2775                       "deblocking filter row store",
2776                       width_in_mbs * 64 * 4,
2777                       0x1000);
2778     assert(bo);
2779     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2780     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2781
2782     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2783     bo = dri_bo_alloc(i965->intel.bufmgr,
2784                       "bsd mpc row store",
2785                       width_in_mbs * 64 * 2,
2786                       0x1000);
2787     assert(bo);
2788     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2789     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2790
2791     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2792     bo = dri_bo_alloc(i965->intel.bufmgr,
2793                       "mpr row store",
2794                       width_in_mbs * 64 * 2,
2795                       0x1000);
2796     assert(bo);
2797     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2798     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2799
2800     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2801 }
2802
2803 static void
2804 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2805                        struct decode_state *decode_state,
2806                        struct gen7_mfd_context *gen7_mfd_context)
2807 {
2808     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2809     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2810     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2811     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2812     dri_bo *probs_bo = decode_state->probability_data->bo;
2813     int i, j,log2num;
2814     unsigned int quantization_value[4][6];
2815
2816     /* There is no safe way to error out if the segmentation buffer
2817        could not be allocated. So, instead of aborting, simply decode
2818        something even if the result may look totally inacurate */
2819     const unsigned int enable_segmentation =
2820         pic_param->pic_fields.bits.segmentation_enabled &&
2821         gen7_mfd_context->segmentation_buffer.valid;
2822         
2823     log2num = (int)log2(slice_param->num_of_partitions - 1);
2824
2825     BEGIN_BCS_BATCH(batch, 38);
2826     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2827     OUT_BCS_BATCH(batch,
2828                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2829                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2830     OUT_BCS_BATCH(batch,
2831                   log2num << 24 |
2832                   pic_param->pic_fields.bits.sharpness_level << 16 |
2833                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2834                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2835                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2836                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2837                   pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2838                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2839                   (enable_segmentation &&
2840                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2841                   (enable_segmentation &&
2842                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2843                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2844                   pic_param->pic_fields.bits.filter_type << 4 |
2845                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2846                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2847
2848     OUT_BCS_BATCH(batch,
2849                   pic_param->loop_filter_level[3] << 24 |
2850                   pic_param->loop_filter_level[2] << 16 |
2851                   pic_param->loop_filter_level[1] <<  8 |
2852                   pic_param->loop_filter_level[0] <<  0);
2853
2854     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2855     for (i = 0; i < 4; i++) {
2856                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2857                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2858                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2859                 /* 101581>>16 is equivalent to 155/100 */
2860                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2861                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2862                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2863
2864                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2865                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2866
2867                 OUT_BCS_BATCH(batch,
2868                       quantization_value[i][0] << 16 | /* Y1AC */
2869                       quantization_value[i][1] <<  0); /* Y1DC */
2870         OUT_BCS_BATCH(batch,
2871                       quantization_value[i][5] << 16 | /* UVAC */
2872                       quantization_value[i][4] <<  0); /* UVDC */
2873         OUT_BCS_BATCH(batch,
2874                       quantization_value[i][3] << 16 | /* Y2AC */
2875                       quantization_value[i][2] <<  0); /* Y2DC */
2876     }
2877
2878     /* CoeffProbability table for non-key frame, DW16-DW18 */
2879     if (probs_bo) {
2880         OUT_BCS_RELOC(batch, probs_bo,
2881                       0, I915_GEM_DOMAIN_INSTRUCTION,
2882                       0);
2883         OUT_BCS_BATCH(batch, 0);
2884         OUT_BCS_BATCH(batch, 0);
2885     } else {
2886         OUT_BCS_BATCH(batch, 0);
2887         OUT_BCS_BATCH(batch, 0);
2888         OUT_BCS_BATCH(batch, 0);
2889     }
2890
2891     OUT_BCS_BATCH(batch,
2892                   pic_param->mb_segment_tree_probs[2] << 16 |
2893                   pic_param->mb_segment_tree_probs[1] <<  8 |
2894                   pic_param->mb_segment_tree_probs[0] <<  0);
2895
2896     OUT_BCS_BATCH(batch,
2897                   pic_param->prob_skip_false << 24 |
2898                   pic_param->prob_intra      << 16 |
2899                   pic_param->prob_last       <<  8 |
2900                   pic_param->prob_gf         <<  0);
2901
2902     OUT_BCS_BATCH(batch,
2903                   pic_param->y_mode_probs[3] << 24 |
2904                   pic_param->y_mode_probs[2] << 16 |
2905                   pic_param->y_mode_probs[1] <<  8 |
2906                   pic_param->y_mode_probs[0] <<  0);
2907
2908     OUT_BCS_BATCH(batch,
2909                   pic_param->uv_mode_probs[2] << 16 |
2910                   pic_param->uv_mode_probs[1] <<  8 |
2911                   pic_param->uv_mode_probs[0] <<  0);
2912     
2913     /* MV update value, DW23-DW32 */
2914     for (i = 0; i < 2; i++) {
2915         for (j = 0; j < 20; j += 4) {
2916             OUT_BCS_BATCH(batch,
2917                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2918                           pic_param->mv_probs[i][j + 2] << 16 |
2919                           pic_param->mv_probs[i][j + 1] <<  8 |
2920                           pic_param->mv_probs[i][j + 0] <<  0);
2921         }
2922     }
2923
2924     OUT_BCS_BATCH(batch,
2925                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2926                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2927                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2928                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2929
2930     OUT_BCS_BATCH(batch,
2931                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2932                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2933                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2934                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2935
2936     /* segmentation id stream base address, DW35-DW37 */
2937     if (enable_segmentation) {
2938         OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2939                       0, I915_GEM_DOMAIN_INSTRUCTION,
2940                       0);
2941         OUT_BCS_BATCH(batch, 0);
2942         OUT_BCS_BATCH(batch, 0);
2943     }
2944     else {
2945         OUT_BCS_BATCH(batch, 0);
2946         OUT_BCS_BATCH(batch, 0);
2947         OUT_BCS_BATCH(batch, 0);
2948     }
2949     ADVANCE_BCS_BATCH(batch);
2950 }
2951
2952 static void
2953 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2954                         VAPictureParameterBufferVP8 *pic_param,
2955                         VASliceParameterBufferVP8 *slice_param,
2956                         dri_bo *slice_data_bo,
2957                         struct gen7_mfd_context *gen7_mfd_context)
2958 {
2959     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2960     int i, log2num;
2961     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2962     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2963     unsigned int partition_size_0 = slice_param->partition_size[0];
2964
2965     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2966     if (used_bits == 8) {
2967         used_bits = 0;
2968         offset += 1;
2969         partition_size_0 -= 1;
2970     }
2971
2972     assert(slice_param->num_of_partitions >= 2);
2973     assert(slice_param->num_of_partitions <= 9);
2974
2975     log2num = (int)log2(slice_param->num_of_partitions - 1);
2976
2977     BEGIN_BCS_BATCH(batch, 22);
2978     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2979     OUT_BCS_BATCH(batch,
2980                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2981                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2982                   log2num << 4 |
2983                   (slice_param->macroblock_offset & 0x7));
2984     OUT_BCS_BATCH(batch,
2985                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2986                   0);
2987
2988     OUT_BCS_BATCH(batch, partition_size_0 + 1);
2989     OUT_BCS_BATCH(batch, offset);
2990     //partion sizes in bytes are present after the above first partition when there are more than one token partition
2991     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2992     for (i = 1; i < 9; i++) {
2993         if (i < slice_param->num_of_partitions) {
2994             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
2995             OUT_BCS_BATCH(batch, offset);
2996         } else {
2997             OUT_BCS_BATCH(batch, 0);
2998             OUT_BCS_BATCH(batch, 0);
2999         }
3000
3001         offset += slice_param->partition_size[i];
3002     }
3003
3004     OUT_BCS_BATCH(batch,
3005                   1 << 31 | /* concealment method */
3006                   0);
3007
3008     ADVANCE_BCS_BATCH(batch);
3009 }
3010
3011 void
3012 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3013                             struct decode_state *decode_state,
3014                             struct gen7_mfd_context *gen7_mfd_context)
3015 {
3016     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3017     VAPictureParameterBufferVP8 *pic_param;
3018     VASliceParameterBufferVP8 *slice_param;
3019     dri_bo *slice_data_bo;
3020
3021     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3022     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3023
3024     /* one slice per frame */
3025     if (decode_state->num_slice_params != 1 ||
3026         (!decode_state->slice_params ||
3027          !decode_state->slice_params[0] ||
3028          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3029         (!decode_state->slice_datas ||
3030          !decode_state->slice_datas[0] ||
3031          !decode_state->slice_datas[0]->bo) ||
3032         !decode_state->probability_data) {
3033         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3034
3035         return;
3036     }
3037
3038     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3039     slice_data_bo = decode_state->slice_datas[0]->bo;
3040
3041     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3042     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3043     intel_batchbuffer_emit_mi_flush(batch);
3044     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3045     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3046     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3047     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3048     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3049     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3050     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3051     intel_batchbuffer_end_atomic(batch);
3052     intel_batchbuffer_flush(batch);
3053 }
3054
3055 static VAStatus
3056 gen8_mfd_decode_picture(VADriverContextP ctx, 
3057                         VAProfile profile, 
3058                         union codec_state *codec_state,
3059                         struct hw_context *hw_context)
3060
3061 {
3062     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3063     struct decode_state *decode_state = &codec_state->decode;
3064     VAStatus vaStatus;
3065
3066     assert(gen7_mfd_context);
3067
3068     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3069
3070     if (vaStatus != VA_STATUS_SUCCESS)
3071         goto out;
3072
3073     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3074
3075     switch (profile) {
3076     case VAProfileMPEG2Simple:
3077     case VAProfileMPEG2Main:
3078         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3079         break;
3080         
3081     case VAProfileH264ConstrainedBaseline:
3082     case VAProfileH264Main:
3083     case VAProfileH264High:
3084     case VAProfileH264StereoHigh:
3085     case VAProfileH264MultiviewHigh:
3086         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3087         break;
3088
3089     case VAProfileVC1Simple:
3090     case VAProfileVC1Main:
3091     case VAProfileVC1Advanced:
3092         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3093         break;
3094
3095     case VAProfileJPEGBaseline:
3096         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3097         break;
3098
3099     case VAProfileVP8Version0_3:
3100         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3101         break;
3102
3103     default:
3104         assert(0);
3105         break;
3106     }
3107
3108     vaStatus = VA_STATUS_SUCCESS;
3109
3110 out:
3111     return vaStatus;
3112 }
3113
3114 static void
3115 gen8_mfd_context_destroy(void *hw_context)
3116 {
3117     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3118
3119     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3120     gen7_mfd_context->post_deblocking_output.bo = NULL;
3121
3122     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3123     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3124
3125     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3126     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3127
3128     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3129     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3130
3131     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3132     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3133
3134     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3135     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3136
3137     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3138     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3139
3140     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3141     gen7_mfd_context->segmentation_buffer.bo = NULL;
3142
3143     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3144
3145     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3146     free(gen7_mfd_context);
3147 }
3148
3149 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3150                                     struct gen7_mfd_context *gen7_mfd_context)
3151 {
3152     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3153     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3154     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3155     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3156 }
3157
3158 struct hw_context *
3159 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3160 {
3161     struct intel_driver_data *intel = intel_driver_data(ctx);
3162     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3163     int i;
3164
3165     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3166     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3167     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3168
3169     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3170         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3171         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3172     }
3173
3174     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3175     gen7_mfd_context->segmentation_buffer.valid = 0;
3176
3177     switch (obj_config->profile) {
3178     case VAProfileMPEG2Simple:
3179     case VAProfileMPEG2Main:
3180         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3181         break;
3182
3183     case VAProfileH264ConstrainedBaseline:
3184     case VAProfileH264Main:
3185     case VAProfileH264High:
3186     case VAProfileH264StereoHigh:
3187     case VAProfileH264MultiviewHigh:
3188         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3189         break;
3190     default:
3191         break;
3192     }
3193     return (struct hw_context *)gen7_mfd_context;
3194 }