OSDN Git Service

Define i965_CreateSurfaces in header file explicitly to avoid multiple declaration
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77         assert((obj_surface->size & 0x3f) == 0);
78         obj_surface->private_data = gen7_avc_surface;
79     }
80
81     /* DMV buffers now relate to the whole frame, irrespective of
82        field coding modes */
83     if (gen7_avc_surface->dmv_top == NULL) {
84         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
85                                                  "direct mv w/r buffer",
86                                                  width_in_mbs * height_in_mbs * 128,
87                                                  0x1000);
88         assert(gen7_avc_surface->dmv_top);
89     }
90 }
91
92 static void
93 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
94                           struct decode_state *decode_state,
95                           int standard_select,
96                           struct gen7_mfd_context *gen7_mfd_context)
97 {
98     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
99
100     assert(standard_select == MFX_FORMAT_MPEG2 ||
101            standard_select == MFX_FORMAT_AVC ||
102            standard_select == MFX_FORMAT_VC1 ||
103            standard_select == MFX_FORMAT_JPEG ||
104            standard_select == MFX_FORMAT_VP8);
105
106     BEGIN_BCS_BATCH(batch, 5);
107     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
108     OUT_BCS_BATCH(batch,
109                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
110                   (MFD_MODE_VLD << 15) | /* VLD mode */
111                   (0 << 10) | /* disable Stream-Out */
112                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
113                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
114                   (0 << 5)  | /* not in stitch mode */
115                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
116                   (standard_select << 0));
117     OUT_BCS_BATCH(batch,
118                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
119                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
120                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
121                   (0 << 1)  |
122                   (0 << 0));
123     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
124     OUT_BCS_BATCH(batch, 0); /* reserved */
125     ADVANCE_BCS_BATCH(batch);
126 }
127
128 static void
129 gen8_mfd_surface_state(VADriverContextP ctx,
130                        struct decode_state *decode_state,
131                        int standard_select,
132                        struct gen7_mfd_context *gen7_mfd_context)
133 {
134     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
135     struct object_surface *obj_surface = decode_state->render_object;
136     unsigned int y_cb_offset;
137     unsigned int y_cr_offset;
138     unsigned int surface_format;
139
140     assert(obj_surface);
141
142     y_cb_offset = obj_surface->y_cb_offset;
143     y_cr_offset = obj_surface->y_cr_offset;
144
145     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
146         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
147
148     BEGIN_BCS_BATCH(batch, 6);
149     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
150     OUT_BCS_BATCH(batch, 0);
151     OUT_BCS_BATCH(batch,
152                   ((obj_surface->orig_height - 1) << 18) |
153                   ((obj_surface->orig_width - 1) << 4));
154     OUT_BCS_BATCH(batch,
155                   (surface_format << 28) | /* 420 planar YUV surface */
156                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
157                   (0 << 22) | /* surface object control state, ignored */
158                   ((obj_surface->width - 1) << 3) | /* pitch */
159                   (0 << 2)  | /* must be 0 */
160                   (1 << 1)  | /* must be tiled */
161                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
162     OUT_BCS_BATCH(batch,
163                   (0 << 16) | /* X offset for U(Cb), must be 0 */
164                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
165     OUT_BCS_BATCH(batch,
166                   (0 << 16) | /* X offset for V(Cr), must be 0 */
167                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
168     ADVANCE_BCS_BATCH(batch);
169 }
170
171 static void
172 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
173                              struct decode_state *decode_state,
174                              int standard_select,
175                              struct gen7_mfd_context *gen7_mfd_context)
176 {
177     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
178     int i;
179
180     BEGIN_BCS_BATCH(batch, 61);
181     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
182         /* Pre-deblock 1-3 */
183     if (gen7_mfd_context->pre_deblocking_output.valid)
184         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
185                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
186                       0);
187     else
188         OUT_BCS_BATCH(batch, 0);
189
190         OUT_BCS_BATCH(batch, 0);
191         OUT_BCS_BATCH(batch, 0);
192         /* Post-debloing 4-6 */
193     if (gen7_mfd_context->post_deblocking_output.valid)
194         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
195                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
196                       0);
197     else
198         OUT_BCS_BATCH(batch, 0);
199
200         OUT_BCS_BATCH(batch, 0);
201         OUT_BCS_BATCH(batch, 0);
202
203         /* uncompressed-video & stream out 7-12 */
204     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
205     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206         OUT_BCS_BATCH(batch, 0);
207         OUT_BCS_BATCH(batch, 0);
208         OUT_BCS_BATCH(batch, 0);
209         OUT_BCS_BATCH(batch, 0);
210
211         /* intra row-store scratch 13-15 */
212     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
213         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
214                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
215                       0);
216     else
217         OUT_BCS_BATCH(batch, 0);
218
219         OUT_BCS_BATCH(batch, 0);
220         OUT_BCS_BATCH(batch, 0);
221         /* deblocking-filter-row-store 16-18 */
222     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
223         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
224                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
225                       0);
226     else
227         OUT_BCS_BATCH(batch, 0);
228         OUT_BCS_BATCH(batch, 0);
229         OUT_BCS_BATCH(batch, 0);
230
231     /* DW 19..50 */
232     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
233         struct object_surface *obj_surface;
234
235         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
236             gen7_mfd_context->reference_surface[i].obj_surface &&
237             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
238             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
239
240             OUT_BCS_RELOC(batch, obj_surface->bo,
241                           I915_GEM_DOMAIN_INSTRUCTION, 0,
242                           0);
243         } else {
244             OUT_BCS_BATCH(batch, 0);
245         }
246         
247         OUT_BCS_BATCH(batch, 0);
248     }
249     
250     /* reference property 51 */
251     OUT_BCS_BATCH(batch, 0);  
252         
253     /* Macroblock status & ILDB 52-57 */
254     OUT_BCS_BATCH(batch, 0);
255     OUT_BCS_BATCH(batch, 0);
256     OUT_BCS_BATCH(batch, 0);
257     OUT_BCS_BATCH(batch, 0);
258     OUT_BCS_BATCH(batch, 0);
259     OUT_BCS_BATCH(batch, 0);
260
261     /* the second Macroblock status 58-60 */    
262     OUT_BCS_BATCH(batch, 0);
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265
266     ADVANCE_BCS_BATCH(batch);
267 }
268
269 static void
270 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
271                                  dri_bo *slice_data_bo,
272                                  int standard_select,
273                                  struct gen7_mfd_context *gen7_mfd_context)
274 {
275     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
276
277     BEGIN_BCS_BATCH(batch, 26);
278     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
279         /* MFX In BS 1-5 */
280     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
281     OUT_BCS_BATCH(batch, 0);
282     OUT_BCS_BATCH(batch, 0);
283         /* Upper bound 4-5 */   
284     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
285     OUT_BCS_BATCH(batch, 0);
286
287         /* MFX indirect MV 6-10 */
288     OUT_BCS_BATCH(batch, 0);
289     OUT_BCS_BATCH(batch, 0);
290     OUT_BCS_BATCH(batch, 0);
291     OUT_BCS_BATCH(batch, 0);
292     OUT_BCS_BATCH(batch, 0);
293         
294         /* MFX IT_COFF 11-15 */
295     OUT_BCS_BATCH(batch, 0);
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299     OUT_BCS_BATCH(batch, 0);
300
301         /* MFX IT_DBLK 16-20 */
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0);
307
308         /* MFX PAK_BSE object for encoder 21-25 */
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313     OUT_BCS_BATCH(batch, 0);
314
315     ADVANCE_BCS_BATCH(batch);
316 }
317
318 static void
319 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
320                                  struct decode_state *decode_state,
321                                  int standard_select,
322                                  struct gen7_mfd_context *gen7_mfd_context)
323 {
324     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
325
326     BEGIN_BCS_BATCH(batch, 10);
327     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
328
329     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
330         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
331                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
332                       0);
333         else
334                 OUT_BCS_BATCH(batch, 0);
335                 
336     OUT_BCS_BATCH(batch, 0);
337     OUT_BCS_BATCH(batch, 0);
338         /* MPR Row Store Scratch buffer 4-6 */
339     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
340         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
341                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
342                       0);
343     else
344         OUT_BCS_BATCH(batch, 0);
345
346     OUT_BCS_BATCH(batch, 0);
347     OUT_BCS_BATCH(batch, 0);
348
349         /* Bitplane 7-9 */ 
350     if (gen7_mfd_context->bitplane_read_buffer.valid)
351         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
352                       I915_GEM_DOMAIN_INSTRUCTION, 0,
353                       0);
354     else
355         OUT_BCS_BATCH(batch, 0);
356     OUT_BCS_BATCH(batch, 0);
357     OUT_BCS_BATCH(batch, 0);
358     ADVANCE_BCS_BATCH(batch);
359 }
360
361 static void
362 gen8_mfd_qm_state(VADriverContextP ctx,
363                   int qm_type,
364                   unsigned char *qm,
365                   int qm_length,
366                   struct gen7_mfd_context *gen7_mfd_context)
367 {
368     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
369     unsigned int qm_buffer[16];
370
371     assert(qm_length <= 16 * 4);
372     memcpy(qm_buffer, qm, qm_length);
373
374     BEGIN_BCS_BATCH(batch, 18);
375     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
376     OUT_BCS_BATCH(batch, qm_type << 0);
377     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
378     ADVANCE_BCS_BATCH(batch);
379 }
380
381 static void
382 gen8_mfd_avc_img_state(VADriverContextP ctx,
383                        struct decode_state *decode_state,
384                        struct gen7_mfd_context *gen7_mfd_context)
385 {
386     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
387     int img_struct;
388     int mbaff_frame_flag;
389     unsigned int width_in_mbs, height_in_mbs;
390     VAPictureParameterBufferH264 *pic_param;
391
392     assert(decode_state->pic_param && decode_state->pic_param->buffer);
393     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
394     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
395
396     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
397         img_struct = 1;
398     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
399         img_struct = 3;
400     else
401         img_struct = 0;
402
403     if ((img_struct & 0x1) == 0x1) {
404         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
405     } else {
406         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
407     }
408
409     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
410         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
411         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
412     } else {
413         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
414     }
415
416     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
417                         !pic_param->pic_fields.bits.field_pic_flag);
418
419     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
420     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
421
422     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
423     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
424            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
425     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
426
427     BEGIN_BCS_BATCH(batch, 17);
428     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
429     OUT_BCS_BATCH(batch, 
430                   (width_in_mbs * height_in_mbs - 1));
431     OUT_BCS_BATCH(batch, 
432                   ((height_in_mbs - 1) << 16) | 
433                   ((width_in_mbs - 1) << 0));
434     OUT_BCS_BATCH(batch, 
435                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
436                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
437                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
438                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
439                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
440                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
441                   (img_struct << 8));
442     OUT_BCS_BATCH(batch,
443                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
444                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
445                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
446                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
447                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
448                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
449                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
450                   (mbaff_frame_flag << 1) |
451                   (pic_param->pic_fields.bits.field_pic_flag << 0));
452     OUT_BCS_BATCH(batch, 0);
453     OUT_BCS_BATCH(batch, 0);
454     OUT_BCS_BATCH(batch, 0);
455     OUT_BCS_BATCH(batch, 0);
456     OUT_BCS_BATCH(batch, 0);
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     ADVANCE_BCS_BATCH(batch);
465 }
466
467 static void
468 gen8_mfd_avc_qm_state(VADriverContextP ctx,
469                       struct decode_state *decode_state,
470                       struct gen7_mfd_context *gen7_mfd_context)
471 {
472     VAIQMatrixBufferH264 *iq_matrix;
473     VAPictureParameterBufferH264 *pic_param;
474
475     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
476         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
477     else
478         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
479
480     assert(decode_state->pic_param && decode_state->pic_param->buffer);
481     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
482
483     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
484     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
485
486     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
487         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
488         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
489     }
490 }
491
492 static void
493 gen8_mfd_avc_picid_state(VADriverContextP ctx,
494                       struct decode_state *decode_state,
495                       struct gen7_mfd_context *gen7_mfd_context)
496 {
497     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
498
499     BEGIN_BCS_BATCH(batch, 10);
500     OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
501     OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
502     OUT_BCS_BATCH(batch, 0);
503     OUT_BCS_BATCH(batch, 0);
504     OUT_BCS_BATCH(batch, 0);
505     OUT_BCS_BATCH(batch, 0);
506     OUT_BCS_BATCH(batch, 0);
507     OUT_BCS_BATCH(batch, 0);
508     OUT_BCS_BATCH(batch, 0);
509     OUT_BCS_BATCH(batch, 0);
510     ADVANCE_BCS_BATCH(batch);
511 }
512
513 static void
514 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
515                               struct decode_state *decode_state,
516                               VAPictureParameterBufferH264 *pic_param,
517                               VASliceParameterBufferH264 *slice_param,
518                               struct gen7_mfd_context *gen7_mfd_context)
519 {
520     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
521     struct object_surface *obj_surface;
522     GenAvcSurface *gen7_avc_surface;
523     VAPictureH264 *va_pic;
524     int i, j;
525
526     BEGIN_BCS_BATCH(batch, 71);
527     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
528
529     /* reference surfaces 0..15 */
530     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
531         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
532             gen7_mfd_context->reference_surface[i].obj_surface &&
533             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
534
535             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
536             gen7_avc_surface = obj_surface->private_data;
537
538             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
539                           I915_GEM_DOMAIN_INSTRUCTION, 0,
540                           0);
541             OUT_BCS_BATCH(batch, 0);
542         } else {
543             OUT_BCS_BATCH(batch, 0);
544             OUT_BCS_BATCH(batch, 0);
545         }
546     }
547     
548     OUT_BCS_BATCH(batch, 0);
549
550     /* the current decoding frame/field */
551     va_pic = &pic_param->CurrPic;
552     obj_surface = decode_state->render_object;
553     assert(obj_surface->bo && obj_surface->private_data);
554     gen7_avc_surface = obj_surface->private_data;
555
556     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
557                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
558                   0);
559
560     OUT_BCS_BATCH(batch, 0);
561     OUT_BCS_BATCH(batch, 0);
562
563     /* POC List */
564     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
565         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
566             int found = 0;
567
568             assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
569
570             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
571                 va_pic = &pic_param->ReferenceFrames[j];
572                 
573                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
574                     continue;
575
576                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
577                     found = 1;
578                     break;
579                 }
580             }
581
582             assert(found == 1);
583             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
584             
585             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
586             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
587         } else {
588             OUT_BCS_BATCH(batch, 0);
589             OUT_BCS_BATCH(batch, 0);
590         }
591     }
592
593     va_pic = &pic_param->CurrPic;
594     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
595     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
596
597     ADVANCE_BCS_BATCH(batch);
598 }
599
600 static void
601 gen8_mfd_avc_slice_state(VADriverContextP ctx,
602                          VAPictureParameterBufferH264 *pic_param,
603                          VASliceParameterBufferH264 *slice_param,
604                          VASliceParameterBufferH264 *next_slice_param,
605                          struct gen7_mfd_context *gen7_mfd_context)
606 {
607     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
608     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
609     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
610     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
611     int num_ref_idx_l0, num_ref_idx_l1;
612     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
613                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
614     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
615     int slice_type;
616
617     if (slice_param->slice_type == SLICE_TYPE_I ||
618         slice_param->slice_type == SLICE_TYPE_SI) {
619         slice_type = SLICE_TYPE_I;
620     } else if (slice_param->slice_type == SLICE_TYPE_P ||
621                slice_param->slice_type == SLICE_TYPE_SP) {
622         slice_type = SLICE_TYPE_P;
623     } else { 
624         assert(slice_param->slice_type == SLICE_TYPE_B);
625         slice_type = SLICE_TYPE_B;
626     }
627
628     if (slice_type == SLICE_TYPE_I) {
629         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
630         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
631         num_ref_idx_l0 = 0;
632         num_ref_idx_l1 = 0;
633     } else if (slice_type == SLICE_TYPE_P) {
634         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
635         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
636         num_ref_idx_l1 = 0;
637     } else {
638         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
639         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
640     }
641
642     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
643     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
644     slice_ver_pos = first_mb_in_slice / width_in_mbs;
645
646     if (next_slice_param) {
647         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
648         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
649         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
650     } else {
651         next_slice_hor_pos = 0;
652         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
653     }
654
655     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
656     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
657     OUT_BCS_BATCH(batch, slice_type);
658     OUT_BCS_BATCH(batch, 
659                   (num_ref_idx_l1 << 24) |
660                   (num_ref_idx_l0 << 16) |
661                   (slice_param->chroma_log2_weight_denom << 8) |
662                   (slice_param->luma_log2_weight_denom << 0));
663     OUT_BCS_BATCH(batch, 
664                   (slice_param->direct_spatial_mv_pred_flag << 29) |
665                   (slice_param->disable_deblocking_filter_idc << 27) |
666                   (slice_param->cabac_init_idc << 24) |
667                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
668                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
669                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
670     OUT_BCS_BATCH(batch, 
671                   (slice_ver_pos << 24) |
672                   (slice_hor_pos << 16) | 
673                   (first_mb_in_slice << 0));
674     OUT_BCS_BATCH(batch,
675                   (next_slice_ver_pos << 16) |
676                   (next_slice_hor_pos << 0));
677     OUT_BCS_BATCH(batch, 
678                   (next_slice_param == NULL) << 19); /* last slice flag */
679     OUT_BCS_BATCH(batch, 0);
680     OUT_BCS_BATCH(batch, 0);
681     OUT_BCS_BATCH(batch, 0);
682     OUT_BCS_BATCH(batch, 0);
683     ADVANCE_BCS_BATCH(batch);
684 }
685
686 static inline void
687 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
688                            VAPictureParameterBufferH264 *pic_param,
689                            VASliceParameterBufferH264 *slice_param,
690                            struct gen7_mfd_context *gen7_mfd_context)
691 {
692     gen6_send_avc_ref_idx_state(
693         gen7_mfd_context->base.batch,
694         slice_param,
695         gen7_mfd_context->reference_surface
696     );
697 }
698
699 static void
700 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
701                                 VAPictureParameterBufferH264 *pic_param,
702                                 VASliceParameterBufferH264 *slice_param,
703                                 struct gen7_mfd_context *gen7_mfd_context)
704 {
705     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
706     int i, j, num_weight_offset_table = 0;
707     short weightoffsets[32 * 6];
708
709     if ((slice_param->slice_type == SLICE_TYPE_P ||
710          slice_param->slice_type == SLICE_TYPE_SP) &&
711         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
712         num_weight_offset_table = 1;
713     }
714     
715     if ((slice_param->slice_type == SLICE_TYPE_B) &&
716         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
717         num_weight_offset_table = 2;
718     }
719
720     for (i = 0; i < num_weight_offset_table; i++) {
721         BEGIN_BCS_BATCH(batch, 98);
722         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
723         OUT_BCS_BATCH(batch, i);
724
725         if (i == 0) {
726             for (j = 0; j < 32; j++) {
727                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
728                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
729                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
730                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
731                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
732                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
733             }
734         } else {
735             for (j = 0; j < 32; j++) {
736                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
737                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
738                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
739                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
740                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
741                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
742             }
743         }
744
745         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
746         ADVANCE_BCS_BATCH(batch);
747     }
748 }
749
750 static void
751 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
752                         VAPictureParameterBufferH264 *pic_param,
753                         VASliceParameterBufferH264 *slice_param,
754                         dri_bo *slice_data_bo,
755                         VASliceParameterBufferH264 *next_slice_param,
756                         struct gen7_mfd_context *gen7_mfd_context)
757 {
758     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
759     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
760                                                             slice_param,
761                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
762
763     /* the input bitsteam format on GEN7 differs from GEN6 */
764     BEGIN_BCS_BATCH(batch, 6);
765     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
766     OUT_BCS_BATCH(batch, 
767                   (slice_param->slice_data_size));
768     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
769     OUT_BCS_BATCH(batch,
770                   (0 << 31) |
771                   (0 << 14) |
772                   (0 << 12) |
773                   (0 << 10) |
774                   (0 << 8));
775     OUT_BCS_BATCH(batch,
776                   ((slice_data_bit_offset >> 3) << 16) |
777                   (1 << 7)  |
778                   (0 << 5)  |
779                   (0 << 4)  |
780                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
781                   (slice_data_bit_offset & 0x7));
782     OUT_BCS_BATCH(batch, 0);
783     ADVANCE_BCS_BATCH(batch);
784 }
785
786 static inline void
787 gen8_mfd_avc_context_init(
788     VADriverContextP         ctx,
789     struct gen7_mfd_context *gen7_mfd_context
790 )
791 {
792     /* Initialize flat scaling lists */
793     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
794 }
795
796 static void
797 gen8_mfd_avc_decode_init(VADriverContextP ctx,
798                          struct decode_state *decode_state,
799                          struct gen7_mfd_context *gen7_mfd_context)
800 {
801     VAPictureParameterBufferH264 *pic_param;
802     VASliceParameterBufferH264 *slice_param;
803     struct i965_driver_data *i965 = i965_driver_data(ctx);
804     struct object_surface *obj_surface;
805     dri_bo *bo;
806     int i, j, enable_avc_ildb = 0;
807     unsigned int width_in_mbs, height_in_mbs;
808
809     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
810         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
811         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
812
813         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
814             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
815             assert((slice_param->slice_type == SLICE_TYPE_I) ||
816                    (slice_param->slice_type == SLICE_TYPE_SI) ||
817                    (slice_param->slice_type == SLICE_TYPE_P) ||
818                    (slice_param->slice_type == SLICE_TYPE_SP) ||
819                    (slice_param->slice_type == SLICE_TYPE_B));
820
821             if (slice_param->disable_deblocking_filter_idc != 1) {
822                 enable_avc_ildb = 1;
823                 break;
824             }
825
826             slice_param++;
827         }
828     }
829
830     assert(decode_state->pic_param && decode_state->pic_param->buffer);
831     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
832     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
833     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
834     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
835     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
836     assert(height_in_mbs > 0 && height_in_mbs <= 256);
837
838     /* Current decoded picture */
839     obj_surface = decode_state->render_object;
840     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
841     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
842
843     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
844     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
845
846     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
847     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
848     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
849     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
850
851     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
852     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
853     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
854     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
855
856     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
857     bo = dri_bo_alloc(i965->intel.bufmgr,
858                       "intra row store",
859                       width_in_mbs * 64,
860                       0x1000);
861     assert(bo);
862     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
863     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
864
865     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
866     bo = dri_bo_alloc(i965->intel.bufmgr,
867                       "deblocking filter row store",
868                       width_in_mbs * 64 * 4,
869                       0x1000);
870     assert(bo);
871     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
872     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
873
874     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
875     bo = dri_bo_alloc(i965->intel.bufmgr,
876                       "bsd mpc row store",
877                       width_in_mbs * 64 * 2,
878                       0x1000);
879     assert(bo);
880     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
881     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
882
883     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
884     bo = dri_bo_alloc(i965->intel.bufmgr,
885                       "mpr row store",
886                       width_in_mbs * 64 * 2,
887                       0x1000);
888     assert(bo);
889     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
890     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
891
892     gen7_mfd_context->bitplane_read_buffer.valid = 0;
893 }
894
895 static void
896 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
897                             struct decode_state *decode_state,
898                             struct gen7_mfd_context *gen7_mfd_context)
899 {
900     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
901     VAPictureParameterBufferH264 *pic_param;
902     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
903     dri_bo *slice_data_bo;
904     int i, j;
905
906     assert(decode_state->pic_param && decode_state->pic_param->buffer);
907     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
908     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
909
910     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
911     intel_batchbuffer_emit_mi_flush(batch);
912     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
913     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
914     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
915     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
916     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
917     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
918     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
919
920     for (j = 0; j < decode_state->num_slice_params; j++) {
921         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
922         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
923         slice_data_bo = decode_state->slice_datas[j]->bo;
924         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
925
926         if (j == decode_state->num_slice_params - 1)
927             next_slice_group_param = NULL;
928         else
929             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
930
931         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
932             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
933             assert((slice_param->slice_type == SLICE_TYPE_I) ||
934                    (slice_param->slice_type == SLICE_TYPE_SI) ||
935                    (slice_param->slice_type == SLICE_TYPE_P) ||
936                    (slice_param->slice_type == SLICE_TYPE_SP) ||
937                    (slice_param->slice_type == SLICE_TYPE_B));
938
939             if (i < decode_state->slice_params[j]->num_elements - 1)
940                 next_slice_param = slice_param + 1;
941             else
942                 next_slice_param = next_slice_group_param;
943
944             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
945             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
946             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
947             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
948             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
949             slice_param++;
950         }
951     }
952
953     intel_batchbuffer_end_atomic(batch);
954     intel_batchbuffer_flush(batch);
955 }
956
957 static void
958 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
959                            struct decode_state *decode_state,
960                            struct gen7_mfd_context *gen7_mfd_context)
961 {
962     VAPictureParameterBufferMPEG2 *pic_param;
963     struct i965_driver_data *i965 = i965_driver_data(ctx);
964     struct object_surface *obj_surface;
965     dri_bo *bo;
966     unsigned int width_in_mbs;
967
968     assert(decode_state->pic_param && decode_state->pic_param->buffer);
969     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
970     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
971
972     mpeg2_set_reference_surfaces(
973         ctx,
974         gen7_mfd_context->reference_surface,
975         decode_state,
976         pic_param
977     );
978
979     /* Current decoded picture */
980     obj_surface = decode_state->render_object;
981     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
982
983     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
984     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
985     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
986     gen7_mfd_context->pre_deblocking_output.valid = 1;
987
988     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
989     bo = dri_bo_alloc(i965->intel.bufmgr,
990                       "bsd mpc row store",
991                       width_in_mbs * 96,
992                       0x1000);
993     assert(bo);
994     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
995     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
996
997     gen7_mfd_context->post_deblocking_output.valid = 0;
998     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
999     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1000     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1001     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1002 }
1003
1004 static void
1005 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1006                          struct decode_state *decode_state,
1007                          struct gen7_mfd_context *gen7_mfd_context)
1008 {
1009     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1010     VAPictureParameterBufferMPEG2 *pic_param;
1011     unsigned int slice_concealment_disable_bit = 0;
1012
1013     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1014     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1015
1016     slice_concealment_disable_bit = 1;
1017
1018     BEGIN_BCS_BATCH(batch, 13);
1019     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1020     OUT_BCS_BATCH(batch,
1021                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1022                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1023                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1024                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1025                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1026                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1027                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1028                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1029                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1030                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1031                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1032                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1033     OUT_BCS_BATCH(batch,
1034                   pic_param->picture_coding_type << 9);
1035     OUT_BCS_BATCH(batch,
1036                   (slice_concealment_disable_bit << 31) |
1037                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1038                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1039     OUT_BCS_BATCH(batch, 0);
1040     OUT_BCS_BATCH(batch, 0);
1041     OUT_BCS_BATCH(batch, 0);
1042     OUT_BCS_BATCH(batch, 0);
1043     OUT_BCS_BATCH(batch, 0);
1044     OUT_BCS_BATCH(batch, 0);
1045     OUT_BCS_BATCH(batch, 0);
1046     OUT_BCS_BATCH(batch, 0);
1047     OUT_BCS_BATCH(batch, 0);
1048     ADVANCE_BCS_BATCH(batch);
1049 }
1050
1051 static void
1052 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1053                         struct decode_state *decode_state,
1054                         struct gen7_mfd_context *gen7_mfd_context)
1055 {
1056     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1057     int i, j;
1058
1059     /* Update internal QM state */
1060     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1061         VAIQMatrixBufferMPEG2 * const iq_matrix =
1062             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1063
1064         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1065             iq_matrix->load_intra_quantiser_matrix) {
1066             gen_iq_matrix->load_intra_quantiser_matrix =
1067                 iq_matrix->load_intra_quantiser_matrix;
1068             if (iq_matrix->load_intra_quantiser_matrix) {
1069                 for (j = 0; j < 64; j++)
1070                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1071                         iq_matrix->intra_quantiser_matrix[j];
1072             }
1073         }
1074
1075         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1076             iq_matrix->load_non_intra_quantiser_matrix) {
1077             gen_iq_matrix->load_non_intra_quantiser_matrix =
1078                 iq_matrix->load_non_intra_quantiser_matrix;
1079             if (iq_matrix->load_non_intra_quantiser_matrix) {
1080                 for (j = 0; j < 64; j++)
1081                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1082                         iq_matrix->non_intra_quantiser_matrix[j];
1083             }
1084         }
1085     }
1086
1087     /* Commit QM state to HW */
1088     for (i = 0; i < 2; i++) {
1089         unsigned char *qm = NULL;
1090         int qm_type;
1091
1092         if (i == 0) {
1093             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1094                 qm = gen_iq_matrix->intra_quantiser_matrix;
1095                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1096             }
1097         } else {
1098             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1099                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1100                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1101             }
1102         }
1103
1104         if (!qm)
1105             continue;
1106
1107         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1108     }
1109 }
1110
1111 static void
1112 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1113                           VAPictureParameterBufferMPEG2 *pic_param,
1114                           VASliceParameterBufferMPEG2 *slice_param,
1115                           VASliceParameterBufferMPEG2 *next_slice_param,
1116                           struct gen7_mfd_context *gen7_mfd_context)
1117 {
1118     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1119     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1120     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1121
1122     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1123         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1124         is_field_pic = 1;
1125     is_field_pic_wa = is_field_pic &&
1126         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1127
1128     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1129     hpos0 = slice_param->slice_horizontal_position;
1130
1131     if (next_slice_param == NULL) {
1132         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1133         hpos1 = 0;
1134     } else {
1135         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1136         hpos1 = next_slice_param->slice_horizontal_position;
1137     }
1138
1139     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1140
1141     BEGIN_BCS_BATCH(batch, 5);
1142     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1143     OUT_BCS_BATCH(batch, 
1144                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1145     OUT_BCS_BATCH(batch, 
1146                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1147     OUT_BCS_BATCH(batch,
1148                   hpos0 << 24 |
1149                   vpos0 << 16 |
1150                   mb_count << 8 |
1151                   (next_slice_param == NULL) << 5 |
1152                   (next_slice_param == NULL) << 3 |
1153                   (slice_param->macroblock_offset & 0x7));
1154     OUT_BCS_BATCH(batch,
1155                   (slice_param->quantiser_scale_code << 24) |
1156                   (vpos1 << 8 | hpos1));
1157     ADVANCE_BCS_BATCH(batch);
1158 }
1159
1160 static void
1161 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1162                               struct decode_state *decode_state,
1163                               struct gen7_mfd_context *gen7_mfd_context)
1164 {
1165     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1166     VAPictureParameterBufferMPEG2 *pic_param;
1167     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1168     dri_bo *slice_data_bo;
1169     int i, j;
1170
1171     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1172     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1173
1174     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1175     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1176     intel_batchbuffer_emit_mi_flush(batch);
1177     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1178     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1179     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1180     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1181     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1182     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1183
1184     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1185         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1186             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1187
1188     for (j = 0; j < decode_state->num_slice_params; j++) {
1189         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1190         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1191         slice_data_bo = decode_state->slice_datas[j]->bo;
1192         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193
1194         if (j == decode_state->num_slice_params - 1)
1195             next_slice_group_param = NULL;
1196         else
1197             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1198
1199         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1200             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1201
1202             if (i < decode_state->slice_params[j]->num_elements - 1)
1203                 next_slice_param = slice_param + 1;
1204             else
1205                 next_slice_param = next_slice_group_param;
1206
1207             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1208             slice_param++;
1209         }
1210     }
1211
1212     intel_batchbuffer_end_atomic(batch);
1213     intel_batchbuffer_flush(batch);
1214 }
1215
1216 static const int va_to_gen7_vc1_pic_type[5] = {
1217     GEN7_VC1_I_PICTURE,
1218     GEN7_VC1_P_PICTURE,
1219     GEN7_VC1_B_PICTURE,
1220     GEN7_VC1_BI_PICTURE,
1221     GEN7_VC1_P_PICTURE,
1222 };
1223
1224 static const int va_to_gen7_vc1_mv[4] = {
1225     1, /* 1-MV */
1226     2, /* 1-MV half-pel */
1227     3, /* 1-MV half-pef bilinear */
1228     0, /* Mixed MV */
1229 };
1230
1231 static const int b_picture_scale_factor[21] = {
1232     128, 85,  170, 64,  192,
1233     51,  102, 153, 204, 43,
1234     215, 37,  74,  111, 148,
1235     185, 222, 32,  96,  160, 
1236     224,
1237 };
1238
1239 static const int va_to_gen7_vc1_condover[3] = {
1240     0,
1241     2,
1242     3
1243 };
1244
1245 static const int va_to_gen7_vc1_profile[4] = {
1246     GEN7_VC1_SIMPLE_PROFILE,
1247     GEN7_VC1_MAIN_PROFILE,
1248     GEN7_VC1_RESERVED_PROFILE,
1249     GEN7_VC1_ADVANCED_PROFILE
1250 };
1251
1252 static void 
1253 gen8_mfd_free_vc1_surface(void **data)
1254 {
1255     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1256
1257     if (!gen7_vc1_surface)
1258         return;
1259
1260     dri_bo_unreference(gen7_vc1_surface->dmv);
1261     free(gen7_vc1_surface);
1262     *data = NULL;
1263 }
1264
1265 static void
1266 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1267                           VAPictureParameterBufferVC1 *pic_param,
1268                           struct object_surface *obj_surface)
1269 {
1270     struct i965_driver_data *i965 = i965_driver_data(ctx);
1271     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1272     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1273     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1274
1275     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1276
1277     if (!gen7_vc1_surface) {
1278         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1279         assert((obj_surface->size & 0x3f) == 0);
1280         obj_surface->private_data = gen7_vc1_surface;
1281     }
1282
1283     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1284
1285     if (gen7_vc1_surface->dmv == NULL) {
1286         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1287                                              "direct mv w/r buffer",
1288                                              width_in_mbs * height_in_mbs * 64,
1289                                              0x1000);
1290     }
1291 }
1292
1293 static void
1294 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1295                          struct decode_state *decode_state,
1296                          struct gen7_mfd_context *gen7_mfd_context)
1297 {
1298     VAPictureParameterBufferVC1 *pic_param;
1299     struct i965_driver_data *i965 = i965_driver_data(ctx);
1300     struct object_surface *obj_surface;
1301     dri_bo *bo;
1302     int width_in_mbs;
1303     int picture_type;
1304
1305     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1306     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1307     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1308     picture_type = pic_param->picture_fields.bits.picture_type;
1309  
1310     intel_update_vc1_frame_store_index(ctx,
1311                                        decode_state,
1312                                        pic_param,
1313                                        gen7_mfd_context->reference_surface);
1314
1315     /* Current decoded picture */
1316     obj_surface = decode_state->render_object;
1317     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1318     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1319
1320     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1321     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1322     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1323     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1324
1325     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1326     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1327     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1328     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1329
1330     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1331     bo = dri_bo_alloc(i965->intel.bufmgr,
1332                       "intra row store",
1333                       width_in_mbs * 64,
1334                       0x1000);
1335     assert(bo);
1336     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1337     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1338
1339     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1340     bo = dri_bo_alloc(i965->intel.bufmgr,
1341                       "deblocking filter row store",
1342                       width_in_mbs * 7 * 64,
1343                       0x1000);
1344     assert(bo);
1345     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1346     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1347
1348     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1349     bo = dri_bo_alloc(i965->intel.bufmgr,
1350                       "bsd mpc row store",
1351                       width_in_mbs * 96,
1352                       0x1000);
1353     assert(bo);
1354     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1355     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1356
1357     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1358
1359     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1360     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1361     
1362     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1363         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1364         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1365         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1366         int src_w, src_h;
1367         uint8_t *src = NULL, *dst = NULL;
1368
1369         assert(decode_state->bit_plane->buffer);
1370         src = decode_state->bit_plane->buffer;
1371
1372         bo = dri_bo_alloc(i965->intel.bufmgr,
1373                           "VC-1 Bitplane",
1374                           bitplane_width * height_in_mbs,
1375                           0x1000);
1376         assert(bo);
1377         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1378
1379         dri_bo_map(bo, True);
1380         assert(bo->virtual);
1381         dst = bo->virtual;
1382
1383         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1384             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1385                 int src_index, dst_index;
1386                 int src_shift;
1387                 uint8_t src_value;
1388
1389                 src_index = (src_h * width_in_mbs + src_w) / 2;
1390                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1391                 src_value = ((src[src_index] >> src_shift) & 0xf);
1392
1393                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1394                     src_value |= 0x2;
1395                 }
1396
1397                 dst_index = src_w / 2;
1398                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1399             }
1400
1401             if (src_w & 1)
1402                 dst[src_w / 2] >>= 4;
1403
1404             dst += bitplane_width;
1405         }
1406
1407         dri_bo_unmap(bo);
1408     } else
1409         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1410 }
1411
1412 static void
1413 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1414                        struct decode_state *decode_state,
1415                        struct gen7_mfd_context *gen7_mfd_context)
1416 {
1417     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1418     VAPictureParameterBufferVC1 *pic_param;
1419     struct object_surface *obj_surface;
1420     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1421     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1422     int unified_mv_mode;
1423     int ref_field_pic_polarity = 0;
1424     int scale_factor = 0;
1425     int trans_ac_y = 0;
1426     int dmv_surface_valid = 0;
1427     int brfd = 0;
1428     int fcm = 0;
1429     int picture_type;
1430     int profile;
1431     int overlap;
1432     int interpolation_mode = 0;
1433
1434     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1435     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1436
1437     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1438     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1439     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1440     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1441     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1442     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1443     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1444     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1445
1446     if (dquant == 0) {
1447         alt_pquant_config = 0;
1448         alt_pquant_edge_mask = 0;
1449     } else if (dquant == 2) {
1450         alt_pquant_config = 1;
1451         alt_pquant_edge_mask = 0xf;
1452     } else {
1453         assert(dquant == 1);
1454         if (dquantfrm == 0) {
1455             alt_pquant_config = 0;
1456             alt_pquant_edge_mask = 0;
1457             alt_pq = 0;
1458         } else {
1459             assert(dquantfrm == 1);
1460             alt_pquant_config = 1;
1461
1462             switch (dqprofile) {
1463             case 3:
1464                 if (dqbilevel == 0) {
1465                     alt_pquant_config = 2;
1466                     alt_pquant_edge_mask = 0;
1467                 } else {
1468                     assert(dqbilevel == 1);
1469                     alt_pquant_config = 3;
1470                     alt_pquant_edge_mask = 0;
1471                 }
1472                 break;
1473                 
1474             case 0:
1475                 alt_pquant_edge_mask = 0xf;
1476                 break;
1477
1478             case 1:
1479                 if (dqdbedge == 3)
1480                     alt_pquant_edge_mask = 0x9;
1481                 else
1482                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1483
1484                 break;
1485
1486             case 2:
1487                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1488                 break;
1489
1490             default:
1491                 assert(0);
1492             }
1493         }
1494     }
1495
1496     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1497         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1498         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1499     } else {
1500         assert(pic_param->mv_fields.bits.mv_mode < 4);
1501         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1502     }
1503
1504     if (pic_param->sequence_fields.bits.interlace == 1 &&
1505         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1506         /* FIXME: calculate reference field picture polarity */
1507         assert(0);
1508         ref_field_pic_polarity = 0;
1509     }
1510
1511     if (pic_param->b_picture_fraction < 21)
1512         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1513
1514     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1515     
1516     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1517         picture_type == GEN7_VC1_I_PICTURE)
1518         picture_type = GEN7_VC1_BI_PICTURE;
1519
1520     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1521         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1522     else {
1523         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1524
1525         /*
1526          * 8.3.6.2.1 Transform Type Selection
1527          * If variable-sized transform coding is not enabled,
1528          * then the 8x8 transform shall be used for all blocks.
1529          * it is also MFX_VC1_PIC_STATE requirement.
1530          */
1531         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1532             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1533             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1534         }
1535     }
1536
1537     if (picture_type == GEN7_VC1_B_PICTURE) {
1538         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1539
1540         obj_surface = decode_state->reference_objects[1];
1541
1542         if (obj_surface)
1543             gen7_vc1_surface = obj_surface->private_data;
1544
1545         if (!gen7_vc1_surface || 
1546             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1547              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1548             dmv_surface_valid = 0;
1549         else
1550             dmv_surface_valid = 1;
1551     }
1552
1553     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1554
1555     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1556         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1557     else {
1558         if (pic_param->picture_fields.bits.top_field_first)
1559             fcm = 2;
1560         else
1561             fcm = 3;
1562     }
1563
1564     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1565         brfd = pic_param->reference_fields.bits.reference_distance;
1566         brfd = (scale_factor * brfd) >> 8;
1567         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1568
1569         if (brfd < 0)
1570             brfd = 0;
1571     }
1572
1573     overlap = 0;
1574     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1575         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1576             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1577             overlap = 1; 
1578         }
1579     }else {
1580         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1581              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1582               overlap = 1; 
1583         }
1584         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1585             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1586              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1587                 overlap = 1; 
1588              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1589                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1590                  overlap = 1;
1591              }
1592         }
1593     } 
1594
1595     assert(pic_param->conditional_overlap_flag < 3);
1596     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1597
1598     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1599         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1600          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1601         interpolation_mode = 9; /* Half-pel bilinear */
1602     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1603              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1604               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1605         interpolation_mode = 1; /* Half-pel bicubic */
1606     else
1607         interpolation_mode = 0; /* Quarter-pel bicubic */
1608
1609     BEGIN_BCS_BATCH(batch, 6);
1610     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1611     OUT_BCS_BATCH(batch,
1612                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1613                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1614     OUT_BCS_BATCH(batch,
1615                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1616                   dmv_surface_valid << 15 |
1617                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1618                   pic_param->rounding_control << 13 |
1619                   pic_param->sequence_fields.bits.syncmarker << 12 |
1620                   interpolation_mode << 8 |
1621                   0 << 7 | /* FIXME: scale up or down ??? */
1622                   pic_param->range_reduction_frame << 6 |
1623                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1624                   overlap << 4 |
1625                   !pic_param->picture_fields.bits.is_first_field << 3 |
1626                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1627     OUT_BCS_BATCH(batch,
1628                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1629                   picture_type << 26 |
1630                   fcm << 24 |
1631                   alt_pq << 16 |
1632                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1633                   scale_factor << 0);
1634     OUT_BCS_BATCH(batch,
1635                   unified_mv_mode << 28 |
1636                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1637                   pic_param->fast_uvmc_flag << 26 |
1638                   ref_field_pic_polarity << 25 |
1639                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1640                   pic_param->reference_fields.bits.reference_distance << 20 |
1641                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1642                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1643                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1644                   alt_pquant_edge_mask << 4 |
1645                   alt_pquant_config << 2 |
1646                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1647                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1648     OUT_BCS_BATCH(batch,
1649                   !!pic_param->bitplane_present.value << 31 |
1650                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1651                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1652                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1653                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1654                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1655                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1656                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1657                   pic_param->mv_fields.bits.mv_table << 20 |
1658                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1659                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1660                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1661                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1662                   pic_param->mb_mode_table << 8 |
1663                   trans_ac_y << 6 |
1664                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1665                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1666                   pic_param->cbp_table << 0);
1667     ADVANCE_BCS_BATCH(batch);
1668 }
1669
1670 static void
1671 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1672                              struct decode_state *decode_state,
1673                              struct gen7_mfd_context *gen7_mfd_context)
1674 {
1675     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1676     VAPictureParameterBufferVC1 *pic_param;
1677     int intensitycomp_single;
1678
1679     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1680     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1681
1682     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1683     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1684     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1685
1686     BEGIN_BCS_BATCH(batch, 6);
1687     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1688     OUT_BCS_BATCH(batch,
1689                   0 << 14 | /* FIXME: double ??? */
1690                   0 << 12 |
1691                   intensitycomp_single << 10 |
1692                   intensitycomp_single << 8 |
1693                   0 << 4 | /* FIXME: interlace mode */
1694                   0);
1695     OUT_BCS_BATCH(batch,
1696                   pic_param->luma_shift << 16 |
1697                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1698     OUT_BCS_BATCH(batch, 0);
1699     OUT_BCS_BATCH(batch, 0);
1700     OUT_BCS_BATCH(batch, 0);
1701     ADVANCE_BCS_BATCH(batch);
1702 }
1703
1704 static void
1705 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1706                               struct decode_state *decode_state,
1707                               struct gen7_mfd_context *gen7_mfd_context)
1708 {
1709     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1710     struct object_surface *obj_surface;
1711     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1712
1713     obj_surface = decode_state->render_object;
1714
1715     if (obj_surface && obj_surface->private_data) {
1716         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1717     }
1718
1719     obj_surface = decode_state->reference_objects[1];
1720
1721     if (obj_surface && obj_surface->private_data) {
1722         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1723     }
1724
1725     BEGIN_BCS_BATCH(batch, 7);
1726     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1727
1728     if (dmv_write_buffer)
1729         OUT_BCS_RELOC(batch, dmv_write_buffer,
1730                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1731                       0);
1732     else
1733         OUT_BCS_BATCH(batch, 0);
1734
1735     OUT_BCS_BATCH(batch, 0);
1736     OUT_BCS_BATCH(batch, 0);
1737
1738     if (dmv_read_buffer)
1739         OUT_BCS_RELOC(batch, dmv_read_buffer,
1740                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1741                       0);
1742     else
1743         OUT_BCS_BATCH(batch, 0);
1744     
1745     OUT_BCS_BATCH(batch, 0);
1746     OUT_BCS_BATCH(batch, 0);
1747                   
1748     ADVANCE_BCS_BATCH(batch);
1749 }
1750
1751 static int
1752 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1753 {
1754     int out_slice_data_bit_offset;
1755     int slice_header_size = in_slice_data_bit_offset / 8;
1756     int i, j;
1757
1758     if (profile != 3)
1759         out_slice_data_bit_offset = in_slice_data_bit_offset;
1760     else {
1761         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1762             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1763                 i++, j += 2;
1764             }
1765         }
1766
1767         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1768     }
1769
1770     return out_slice_data_bit_offset;
1771 }
1772
1773 static void
1774 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1775                         VAPictureParameterBufferVC1 *pic_param,
1776                         VASliceParameterBufferVC1 *slice_param,
1777                         VASliceParameterBufferVC1 *next_slice_param,
1778                         dri_bo *slice_data_bo,
1779                         struct gen7_mfd_context *gen7_mfd_context)
1780 {
1781     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1782     int next_slice_start_vert_pos;
1783     int macroblock_offset;
1784     uint8_t *slice_data = NULL;
1785
1786     dri_bo_map(slice_data_bo, 0);
1787     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1788     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1789                                                                slice_param->macroblock_offset,
1790                                                                pic_param->sequence_fields.bits.profile);
1791     dri_bo_unmap(slice_data_bo);
1792
1793     if (next_slice_param)
1794         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1795     else
1796         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1797
1798     BEGIN_BCS_BATCH(batch, 5);
1799     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1800     OUT_BCS_BATCH(batch, 
1801                   slice_param->slice_data_size - (macroblock_offset >> 3));
1802     OUT_BCS_BATCH(batch, 
1803                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1804     OUT_BCS_BATCH(batch,
1805                   slice_param->slice_vertical_position << 16 |
1806                   next_slice_start_vert_pos << 0);
1807     OUT_BCS_BATCH(batch,
1808                   (macroblock_offset & 0x7));
1809     ADVANCE_BCS_BATCH(batch);
1810 }
1811
1812 static void
1813 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1814                             struct decode_state *decode_state,
1815                             struct gen7_mfd_context *gen7_mfd_context)
1816 {
1817     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1818     VAPictureParameterBufferVC1 *pic_param;
1819     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1820     dri_bo *slice_data_bo;
1821     int i, j;
1822
1823     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1824     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1825
1826     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1827     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1828     intel_batchbuffer_emit_mi_flush(batch);
1829     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1830     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1831     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1832     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1833     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1834     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1835     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1836
1837     for (j = 0; j < decode_state->num_slice_params; j++) {
1838         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1839         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1840         slice_data_bo = decode_state->slice_datas[j]->bo;
1841         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1842
1843         if (j == decode_state->num_slice_params - 1)
1844             next_slice_group_param = NULL;
1845         else
1846             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1847
1848         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1849             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1850
1851             if (i < decode_state->slice_params[j]->num_elements - 1)
1852                 next_slice_param = slice_param + 1;
1853             else
1854                 next_slice_param = next_slice_group_param;
1855
1856             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1857             slice_param++;
1858         }
1859     }
1860
1861     intel_batchbuffer_end_atomic(batch);
1862     intel_batchbuffer_flush(batch);
1863 }
1864
1865 static void
1866 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1867                           struct decode_state *decode_state,
1868                           struct gen7_mfd_context *gen7_mfd_context)
1869 {
1870     struct object_surface *obj_surface;
1871     VAPictureParameterBufferJPEGBaseline *pic_param;
1872     int subsampling = SUBSAMPLE_YUV420;
1873     int fourcc = VA_FOURCC_IMC3;
1874
1875     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1876
1877     if (pic_param->num_components == 1)
1878         subsampling = SUBSAMPLE_YUV400;
1879     else if (pic_param->num_components == 3) {
1880         int h1 = pic_param->components[0].h_sampling_factor;
1881         int h2 = pic_param->components[1].h_sampling_factor;
1882         int h3 = pic_param->components[2].h_sampling_factor;
1883         int v1 = pic_param->components[0].v_sampling_factor;
1884         int v2 = pic_param->components[1].v_sampling_factor;
1885         int v3 = pic_param->components[2].v_sampling_factor;
1886
1887         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1888             v1 == 2 && v2 == 1 && v3 == 1) {
1889             subsampling = SUBSAMPLE_YUV420;
1890             fourcc = VA_FOURCC_IMC3;
1891         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1892                    v1 == 1 && v2 == 1 && v3 == 1) {
1893             subsampling = SUBSAMPLE_YUV422H;
1894             fourcc = VA_FOURCC_422H;
1895         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1896                    v1 == 1 && v2 == 1 && v3 == 1) {
1897             subsampling = SUBSAMPLE_YUV444;
1898             fourcc = VA_FOURCC_444P;
1899         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1900                    v1 == 1 && v2 == 1 && v3 == 1) {
1901             subsampling = SUBSAMPLE_YUV411;
1902             fourcc = VA_FOURCC_411P;
1903         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1904                    v1 == 2 && v2 == 1 && v3 == 1) {
1905             subsampling = SUBSAMPLE_YUV422V;
1906             fourcc = VA_FOURCC_422V;
1907         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1908                    v1 == 2 && v2 == 2 && v3 == 2) {
1909             subsampling = SUBSAMPLE_YUV422H;
1910             fourcc = VA_FOURCC_422H;
1911         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1912                    v1 == 2 && v2 == 1 && v3 == 1) {
1913             subsampling = SUBSAMPLE_YUV422V;
1914             fourcc = VA_FOURCC_422V;
1915         } else
1916             assert(0);
1917     }
1918     else {
1919         assert(0);
1920     }
1921
1922     /* Current decoded picture */
1923     obj_surface = decode_state->render_object;
1924     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1925
1926     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1927     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1928     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1929     gen7_mfd_context->pre_deblocking_output.valid = 1;
1930
1931     gen7_mfd_context->post_deblocking_output.bo = NULL;
1932     gen7_mfd_context->post_deblocking_output.valid = 0;
1933
1934     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1935     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1936
1937     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1938     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1939
1940     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1941     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1942
1943     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1944     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1945
1946     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1947     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1948 }
1949
1950 static const int va_to_gen7_jpeg_rotation[4] = {
1951     GEN7_JPEG_ROTATION_0,
1952     GEN7_JPEG_ROTATION_90,
1953     GEN7_JPEG_ROTATION_180,
1954     GEN7_JPEG_ROTATION_270
1955 };
1956
1957 static void
1958 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1959                         struct decode_state *decode_state,
1960                         struct gen7_mfd_context *gen7_mfd_context)
1961 {
1962     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1963     VAPictureParameterBufferJPEGBaseline *pic_param;
1964     int chroma_type = GEN7_YUV420;
1965     int frame_width_in_blks;
1966     int frame_height_in_blks;
1967
1968     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1969     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1970
1971     if (pic_param->num_components == 1)
1972         chroma_type = GEN7_YUV400;
1973     else if (pic_param->num_components == 3) {
1974         int h1 = pic_param->components[0].h_sampling_factor;
1975         int h2 = pic_param->components[1].h_sampling_factor;
1976         int h3 = pic_param->components[2].h_sampling_factor;
1977         int v1 = pic_param->components[0].v_sampling_factor;
1978         int v2 = pic_param->components[1].v_sampling_factor;
1979         int v3 = pic_param->components[2].v_sampling_factor;
1980
1981         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1982             v1 == 2 && v2 == 1 && v3 == 1)
1983             chroma_type = GEN7_YUV420;
1984         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1985                  v1 == 1 && v2 == 1 && v3 == 1)
1986             chroma_type = GEN7_YUV422H_2Y;
1987         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1988                  v1 == 1 && v2 == 1 && v3 == 1)
1989             chroma_type = GEN7_YUV444;
1990         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1991                  v1 == 1 && v2 == 1 && v3 == 1)
1992             chroma_type = GEN7_YUV411;
1993         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1994                  v1 == 2 && v2 == 1 && v3 == 1)
1995             chroma_type = GEN7_YUV422V_2Y;
1996         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1997                  v1 == 2 && v2 == 2 && v3 == 2)
1998             chroma_type = GEN7_YUV422H_4Y;
1999         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2000                  v1 == 2 && v2 == 1 && v3 == 1)
2001             chroma_type = GEN7_YUV422V_4Y;
2002         else
2003             assert(0);
2004     }
2005
2006     if (chroma_type == GEN7_YUV400 ||
2007         chroma_type == GEN7_YUV444 ||
2008         chroma_type == GEN7_YUV422V_2Y) {
2009         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2010         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2011     } else if (chroma_type == GEN7_YUV411) {
2012         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2013         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2014     } else {
2015         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2016         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2017     }
2018
2019     BEGIN_BCS_BATCH(batch, 3);
2020     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2021     OUT_BCS_BATCH(batch,
2022                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2023                   (chroma_type << 0));
2024     OUT_BCS_BATCH(batch,
2025                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2026                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2027     ADVANCE_BCS_BATCH(batch);
2028 }
2029
2030 static const int va_to_gen7_jpeg_hufftable[2] = {
2031     MFX_HUFFTABLE_ID_Y,
2032     MFX_HUFFTABLE_ID_UV
2033 };
2034
2035 static void
2036 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2037                                struct decode_state *decode_state,
2038                                struct gen7_mfd_context *gen7_mfd_context,
2039                                int num_tables)
2040 {
2041     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2042     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2043     int index;
2044
2045     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2046         return;
2047
2048     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2049
2050     for (index = 0; index < num_tables; index++) {
2051         int id = va_to_gen7_jpeg_hufftable[index];
2052         if (!huffman_table->load_huffman_table[index])
2053             continue;
2054         BEGIN_BCS_BATCH(batch, 53);
2055         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2056         OUT_BCS_BATCH(batch, id);
2057         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2058         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2059         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2060         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2061         ADVANCE_BCS_BATCH(batch);
2062     }
2063 }
2064
2065 static const int va_to_gen7_jpeg_qm[5] = {
2066     -1,
2067     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2068     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2069     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2070     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2071 };
2072
2073 static void
2074 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2075                        struct decode_state *decode_state,
2076                        struct gen7_mfd_context *gen7_mfd_context)
2077 {
2078     VAPictureParameterBufferJPEGBaseline *pic_param;
2079     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2080     int index;
2081
2082     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2083         return;
2084
2085     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2086     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2087
2088     assert(pic_param->num_components <= 3);
2089
2090     for (index = 0; index < pic_param->num_components; index++) {
2091         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2092         int qm_type;
2093         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2094         unsigned char raster_qm[64];
2095         int j;
2096
2097         if (id > 4 || id < 1)
2098             continue;
2099
2100         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2101             continue;
2102
2103         qm_type = va_to_gen7_jpeg_qm[id];
2104
2105         for (j = 0; j < 64; j++)
2106             raster_qm[zigzag_direct[j]] = qm[j];
2107
2108         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2109     }
2110 }
2111
2112 static void
2113 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2114                          VAPictureParameterBufferJPEGBaseline *pic_param,
2115                          VASliceParameterBufferJPEGBaseline *slice_param,
2116                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2117                          dri_bo *slice_data_bo,
2118                          struct gen7_mfd_context *gen7_mfd_context)
2119 {
2120     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2121     int scan_component_mask = 0;
2122     int i;
2123
2124     assert(slice_param->num_components > 0);
2125     assert(slice_param->num_components < 4);
2126     assert(slice_param->num_components <= pic_param->num_components);
2127
2128     for (i = 0; i < slice_param->num_components; i++) {
2129         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2130         case 1:
2131             scan_component_mask |= (1 << 0);
2132             break;
2133         case 2:
2134             scan_component_mask |= (1 << 1);
2135             break;
2136         case 3:
2137             scan_component_mask |= (1 << 2);
2138             break;
2139         default:
2140             assert(0);
2141             break;
2142         }
2143     }
2144
2145     BEGIN_BCS_BATCH(batch, 6);
2146     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2147     OUT_BCS_BATCH(batch, 
2148                   slice_param->slice_data_size);
2149     OUT_BCS_BATCH(batch, 
2150                   slice_param->slice_data_offset);
2151     OUT_BCS_BATCH(batch,
2152                   slice_param->slice_horizontal_position << 16 |
2153                   slice_param->slice_vertical_position << 0);
2154     OUT_BCS_BATCH(batch,
2155                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2156                   (scan_component_mask << 27) |                 /* scan components */
2157                   (0 << 26) |   /* disable interrupt allowed */
2158                   (slice_param->num_mcus << 0));                /* MCU count */
2159     OUT_BCS_BATCH(batch,
2160                   (slice_param->restart_interval << 0));    /* RestartInterval */
2161     ADVANCE_BCS_BATCH(batch);
2162 }
2163
2164 /* Workaround for JPEG decoding on Ivybridge */
2165 #ifdef JPEG_WA
2166
2167 static struct {
2168     int width;
2169     int height;
2170     unsigned char data[32];
2171     int data_size;
2172     int data_bit_offset;
2173     int qp;
2174 } gen7_jpeg_wa_clip = {
2175     16,
2176     16,
2177     {
2178         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2179         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2180     },
2181     14,
2182     40,
2183     28,
2184 };
2185
2186 static void
2187 gen8_jpeg_wa_init(VADriverContextP ctx,
2188                   struct gen7_mfd_context *gen7_mfd_context)
2189 {
2190     struct i965_driver_data *i965 = i965_driver_data(ctx);
2191     VAStatus status;
2192     struct object_surface *obj_surface;
2193
2194     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2195         i965_DestroySurfaces(ctx,
2196                              &gen7_mfd_context->jpeg_wa_surface_id,
2197                              1);
2198
2199     status = i965_CreateSurfaces(ctx,
2200                                  gen7_jpeg_wa_clip.width,
2201                                  gen7_jpeg_wa_clip.height,
2202                                  VA_RT_FORMAT_YUV420,
2203                                  1,
2204                                  &gen7_mfd_context->jpeg_wa_surface_id);
2205     assert(status == VA_STATUS_SUCCESS);
2206
2207     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2208     assert(obj_surface);
2209     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2210     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2211
2212     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2213         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2214                                                                "JPEG WA data",
2215                                                                0x1000,
2216                                                                0x1000);
2217         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2218                        0,
2219                        gen7_jpeg_wa_clip.data_size,
2220                        gen7_jpeg_wa_clip.data);
2221     }
2222 }
2223
2224 static void
2225 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2226                               struct gen7_mfd_context *gen7_mfd_context)
2227 {
2228     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2229
2230     BEGIN_BCS_BATCH(batch, 5);
2231     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2232     OUT_BCS_BATCH(batch,
2233                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2234                   (MFD_MODE_VLD << 15) | /* VLD mode */
2235                   (0 << 10) | /* disable Stream-Out */
2236                   (0 << 9)  | /* Post Deblocking Output */
2237                   (1 << 8)  | /* Pre Deblocking Output */
2238                   (0 << 5)  | /* not in stitch mode */
2239                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2240                   (MFX_FORMAT_AVC << 0));
2241     OUT_BCS_BATCH(batch,
2242                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2243                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2244                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2245                   (0 << 1)  |
2246                   (0 << 0));
2247     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2248     OUT_BCS_BATCH(batch, 0); /* reserved */
2249     ADVANCE_BCS_BATCH(batch);
2250 }
2251
2252 static void
2253 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2254                            struct gen7_mfd_context *gen7_mfd_context)
2255 {
2256     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2257     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2258
2259     BEGIN_BCS_BATCH(batch, 6);
2260     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2261     OUT_BCS_BATCH(batch, 0);
2262     OUT_BCS_BATCH(batch,
2263                   ((obj_surface->orig_width - 1) << 18) |
2264                   ((obj_surface->orig_height - 1) << 4));
2265     OUT_BCS_BATCH(batch,
2266                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2267                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2268                   (0 << 22) | /* surface object control state, ignored */
2269                   ((obj_surface->width - 1) << 3) | /* pitch */
2270                   (0 << 2)  | /* must be 0 */
2271                   (1 << 1)  | /* must be tiled */
2272                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2273     OUT_BCS_BATCH(batch,
2274                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2275                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2276     OUT_BCS_BATCH(batch,
2277                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2278                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2279     ADVANCE_BCS_BATCH(batch);
2280 }
2281
2282 static void
2283 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2284                                  struct gen7_mfd_context *gen7_mfd_context)
2285 {
2286     struct i965_driver_data *i965 = i965_driver_data(ctx);
2287     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2288     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2289     dri_bo *intra_bo;
2290     int i;
2291
2292     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2293                             "intra row store",
2294                             128 * 64,
2295                             0x1000);
2296
2297     BEGIN_BCS_BATCH(batch, 61);
2298     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2299     OUT_BCS_RELOC(batch,
2300                   obj_surface->bo,
2301                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2302                   0);
2303         OUT_BCS_BATCH(batch, 0);
2304         OUT_BCS_BATCH(batch, 0);
2305     
2306
2307     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2308         OUT_BCS_BATCH(batch, 0);
2309         OUT_BCS_BATCH(batch, 0);
2310
2311         /* uncompressed-video & stream out 7-12 */
2312     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2313     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2314         OUT_BCS_BATCH(batch, 0);
2315         OUT_BCS_BATCH(batch, 0);
2316         OUT_BCS_BATCH(batch, 0);
2317         OUT_BCS_BATCH(batch, 0);
2318
2319         /* the DW 13-15 is for intra row store scratch */
2320     OUT_BCS_RELOC(batch,
2321                   intra_bo,
2322                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2323                   0);
2324         OUT_BCS_BATCH(batch, 0);
2325         OUT_BCS_BATCH(batch, 0);
2326
2327         /* the DW 16-18 is for deblocking filter */ 
2328     OUT_BCS_BATCH(batch, 0);
2329         OUT_BCS_BATCH(batch, 0);
2330         OUT_BCS_BATCH(batch, 0);
2331
2332     /* DW 19..50 */
2333     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2334         OUT_BCS_BATCH(batch, 0);
2335         OUT_BCS_BATCH(batch, 0);
2336     }
2337     OUT_BCS_BATCH(batch, 0);
2338
2339         /* the DW52-54 is for mb status address */
2340     OUT_BCS_BATCH(batch, 0);
2341         OUT_BCS_BATCH(batch, 0);
2342         OUT_BCS_BATCH(batch, 0);
2343         /* the DW56-60 is for ILDB & second ILDB address */
2344     OUT_BCS_BATCH(batch, 0);
2345         OUT_BCS_BATCH(batch, 0);
2346         OUT_BCS_BATCH(batch, 0);
2347     OUT_BCS_BATCH(batch, 0);
2348         OUT_BCS_BATCH(batch, 0);
2349         OUT_BCS_BATCH(batch, 0);
2350
2351     ADVANCE_BCS_BATCH(batch);
2352
2353     dri_bo_unreference(intra_bo);
2354 }
2355
2356 static void
2357 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2358                                      struct gen7_mfd_context *gen7_mfd_context)
2359 {
2360     struct i965_driver_data *i965 = i965_driver_data(ctx);
2361     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2362     dri_bo *bsd_mpc_bo, *mpr_bo;
2363
2364     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2365                               "bsd mpc row store",
2366                               11520, /* 1.5 * 120 * 64 */
2367                               0x1000);
2368
2369     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2370                           "mpr row store",
2371                           7680, /* 1. 0 * 120 * 64 */
2372                           0x1000);
2373
2374     BEGIN_BCS_BATCH(batch, 10);
2375     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2376
2377     OUT_BCS_RELOC(batch,
2378                   bsd_mpc_bo,
2379                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2380                   0);
2381
2382     OUT_BCS_BATCH(batch, 0);
2383     OUT_BCS_BATCH(batch, 0);
2384
2385     OUT_BCS_RELOC(batch,
2386                   mpr_bo,
2387                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2388                   0);
2389     OUT_BCS_BATCH(batch, 0);
2390     OUT_BCS_BATCH(batch, 0);
2391
2392     OUT_BCS_BATCH(batch, 0);
2393     OUT_BCS_BATCH(batch, 0);
2394     OUT_BCS_BATCH(batch, 0);
2395
2396     ADVANCE_BCS_BATCH(batch);
2397
2398     dri_bo_unreference(bsd_mpc_bo);
2399     dri_bo_unreference(mpr_bo);
2400 }
2401
2402 static void
2403 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2404                           struct gen7_mfd_context *gen7_mfd_context)
2405 {
2406
2407 }
2408
2409 static void
2410 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2411                            struct gen7_mfd_context *gen7_mfd_context)
2412 {
2413     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2414     int img_struct = 0;
2415     int mbaff_frame_flag = 0;
2416     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2417
2418     BEGIN_BCS_BATCH(batch, 16);
2419     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2420     OUT_BCS_BATCH(batch, 
2421                   width_in_mbs * height_in_mbs);
2422     OUT_BCS_BATCH(batch, 
2423                   ((height_in_mbs - 1) << 16) | 
2424                   ((width_in_mbs - 1) << 0));
2425     OUT_BCS_BATCH(batch, 
2426                   (0 << 24) |
2427                   (0 << 16) |
2428                   (0 << 14) |
2429                   (0 << 13) |
2430                   (0 << 12) | /* differ from GEN6 */
2431                   (0 << 10) |
2432                   (img_struct << 8));
2433     OUT_BCS_BATCH(batch,
2434                   (1 << 10) | /* 4:2:0 */
2435                   (1 << 7) |  /* CABAC */
2436                   (0 << 6) |
2437                   (0 << 5) |
2438                   (0 << 4) |
2439                   (0 << 3) |
2440                   (1 << 2) |
2441                   (mbaff_frame_flag << 1) |
2442                   (0 << 0));
2443     OUT_BCS_BATCH(batch, 0);
2444     OUT_BCS_BATCH(batch, 0);
2445     OUT_BCS_BATCH(batch, 0);
2446     OUT_BCS_BATCH(batch, 0);
2447     OUT_BCS_BATCH(batch, 0);
2448     OUT_BCS_BATCH(batch, 0);
2449     OUT_BCS_BATCH(batch, 0);
2450     OUT_BCS_BATCH(batch, 0);
2451     OUT_BCS_BATCH(batch, 0);
2452     OUT_BCS_BATCH(batch, 0);
2453     OUT_BCS_BATCH(batch, 0);
2454     ADVANCE_BCS_BATCH(batch);
2455 }
2456
2457 static void
2458 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2459                                   struct gen7_mfd_context *gen7_mfd_context)
2460 {
2461     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2462     int i;
2463
2464     BEGIN_BCS_BATCH(batch, 71);
2465     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2466
2467     /* reference surfaces 0..15 */
2468     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2469         OUT_BCS_BATCH(batch, 0); /* top */
2470         OUT_BCS_BATCH(batch, 0); /* bottom */
2471     }
2472         
2473         OUT_BCS_BATCH(batch, 0);
2474
2475     /* the current decoding frame/field */
2476     OUT_BCS_BATCH(batch, 0); /* top */
2477     OUT_BCS_BATCH(batch, 0);
2478     OUT_BCS_BATCH(batch, 0);
2479
2480     /* POC List */
2481     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2482         OUT_BCS_BATCH(batch, 0);
2483         OUT_BCS_BATCH(batch, 0);
2484     }
2485
2486     OUT_BCS_BATCH(batch, 0);
2487     OUT_BCS_BATCH(batch, 0);
2488
2489     ADVANCE_BCS_BATCH(batch);
2490 }
2491
2492 static void
2493 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2494                                      struct gen7_mfd_context *gen7_mfd_context)
2495 {
2496     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2497
2498     BEGIN_BCS_BATCH(batch, 11);
2499     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2500     OUT_BCS_RELOC(batch,
2501                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2502                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2503                   0);
2504     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2505     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2506     OUT_BCS_BATCH(batch, 0);
2507     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2508     OUT_BCS_BATCH(batch, 0);
2509     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2510     OUT_BCS_BATCH(batch, 0);
2511     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2512     OUT_BCS_BATCH(batch, 0);
2513     ADVANCE_BCS_BATCH(batch);
2514 }
2515
2516 static void
2517 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2518                             struct gen7_mfd_context *gen7_mfd_context)
2519 {
2520     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2521
2522     /* the input bitsteam format on GEN7 differs from GEN6 */
2523     BEGIN_BCS_BATCH(batch, 6);
2524     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2525     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2526     OUT_BCS_BATCH(batch, 0);
2527     OUT_BCS_BATCH(batch,
2528                   (0 << 31) |
2529                   (0 << 14) |
2530                   (0 << 12) |
2531                   (0 << 10) |
2532                   (0 << 8));
2533     OUT_BCS_BATCH(batch,
2534                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2535                   (0 << 5)  |
2536                   (0 << 4)  |
2537                   (1 << 3) | /* LastSlice Flag */
2538                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2539     OUT_BCS_BATCH(batch, 0);
2540     ADVANCE_BCS_BATCH(batch);
2541 }
2542
2543 static void
2544 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2545                              struct gen7_mfd_context *gen7_mfd_context)
2546 {
2547     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2548     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2549     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2550     int first_mb_in_slice = 0;
2551     int slice_type = SLICE_TYPE_I;
2552
2553     BEGIN_BCS_BATCH(batch, 11);
2554     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2555     OUT_BCS_BATCH(batch, slice_type);
2556     OUT_BCS_BATCH(batch, 
2557                   (num_ref_idx_l1 << 24) |
2558                   (num_ref_idx_l0 << 16) |
2559                   (0 << 8) |
2560                   (0 << 0));
2561     OUT_BCS_BATCH(batch, 
2562                   (0 << 29) |
2563                   (1 << 27) |   /* disable Deblocking */
2564                   (0 << 24) |
2565                   (gen7_jpeg_wa_clip.qp << 16) |
2566                   (0 << 8) |
2567                   (0 << 0));
2568     OUT_BCS_BATCH(batch, 
2569                   (slice_ver_pos << 24) |
2570                   (slice_hor_pos << 16) | 
2571                   (first_mb_in_slice << 0));
2572     OUT_BCS_BATCH(batch,
2573                   (next_slice_ver_pos << 16) |
2574                   (next_slice_hor_pos << 0));
2575     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2576     OUT_BCS_BATCH(batch, 0);
2577     OUT_BCS_BATCH(batch, 0);
2578     OUT_BCS_BATCH(batch, 0);
2579     OUT_BCS_BATCH(batch, 0);
2580     ADVANCE_BCS_BATCH(batch);
2581 }
2582
2583 static void
2584 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2585                  struct gen7_mfd_context *gen7_mfd_context)
2586 {
2587     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2588     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2589     intel_batchbuffer_emit_mi_flush(batch);
2590     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2591     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2592     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2593     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2594     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2595     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2596     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2597
2598     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2599     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2600     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2601 }
2602
2603 #endif
2604
2605 void
2606 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2607                              struct decode_state *decode_state,
2608                              struct gen7_mfd_context *gen7_mfd_context)
2609 {
2610     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2611     VAPictureParameterBufferJPEGBaseline *pic_param;
2612     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2613     dri_bo *slice_data_bo;
2614     int i, j, max_selector = 0;
2615
2616     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2617     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2618
2619     /* Currently only support Baseline DCT */
2620     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2621     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2622 #ifdef JPEG_WA
2623     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2624 #endif
2625     intel_batchbuffer_emit_mi_flush(batch);
2626     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2627     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2628     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2629     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2630     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2631
2632     for (j = 0; j < decode_state->num_slice_params; j++) {
2633         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2634         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2635         slice_data_bo = decode_state->slice_datas[j]->bo;
2636         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2637
2638         if (j == decode_state->num_slice_params - 1)
2639             next_slice_group_param = NULL;
2640         else
2641             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2642
2643         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2644             int component;
2645
2646             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2647
2648             if (i < decode_state->slice_params[j]->num_elements - 1)
2649                 next_slice_param = slice_param + 1;
2650             else
2651                 next_slice_param = next_slice_group_param;
2652
2653             for (component = 0; component < slice_param->num_components; component++) {
2654                 if (max_selector < slice_param->components[component].dc_table_selector)
2655                     max_selector = slice_param->components[component].dc_table_selector;
2656
2657                 if (max_selector < slice_param->components[component].ac_table_selector)
2658                     max_selector = slice_param->components[component].ac_table_selector;
2659             }
2660
2661             slice_param++;
2662         }
2663     }
2664
2665     assert(max_selector < 2);
2666     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2667
2668     for (j = 0; j < decode_state->num_slice_params; j++) {
2669         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2670         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2671         slice_data_bo = decode_state->slice_datas[j]->bo;
2672         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2673
2674         if (j == decode_state->num_slice_params - 1)
2675             next_slice_group_param = NULL;
2676         else
2677             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2678
2679         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2680             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2681
2682             if (i < decode_state->slice_params[j]->num_elements - 1)
2683                 next_slice_param = slice_param + 1;
2684             else
2685                 next_slice_param = next_slice_group_param;
2686
2687             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2688             slice_param++;
2689         }
2690     }
2691
2692     intel_batchbuffer_end_atomic(batch);
2693     intel_batchbuffer_flush(batch);
2694 }
2695
2696 static const int vp8_dc_qlookup[128] =
2697 {
2698       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2699      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2700      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2701      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2702      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2703      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2704      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2705     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2706 };
2707
2708 static const int vp8_ac_qlookup[128] =
2709 {
2710       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2711      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2712      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2713      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2714      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2715     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2716     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2717     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2718 };
2719
2720 static inline unsigned int vp8_clip_quantization_index(int index)
2721 {
2722     if(index > 127)
2723         return 127;
2724     else if(index <0)
2725         return 0;
2726
2727     return index;
2728 }
2729
2730 static void
2731 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2732                           struct decode_state *decode_state,
2733                           struct gen7_mfd_context *gen7_mfd_context)
2734 {
2735     struct object_surface *obj_surface;
2736     struct i965_driver_data *i965 = i965_driver_data(ctx);
2737     dri_bo *bo;
2738     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2739     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2740     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2741
2742     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2743     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2744
2745     intel_update_vp8_frame_store_index(ctx,
2746                                        decode_state,
2747                                        pic_param,
2748                                        gen7_mfd_context->reference_surface);
2749
2750     /* Current decoded picture */
2751     obj_surface = decode_state->render_object;
2752     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2753
2754     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2755     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2756     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2757     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2758
2759     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2760     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2761     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2762     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2763
2764     intel_ensure_vp8_segmentation_buffer(ctx,
2765         &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2766
2767     /* The same as AVC */
2768     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2769     bo = dri_bo_alloc(i965->intel.bufmgr,
2770                       "intra row store",
2771                       width_in_mbs * 64,
2772                       0x1000);
2773     assert(bo);
2774     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2775     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2776
2777     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2778     bo = dri_bo_alloc(i965->intel.bufmgr,
2779                       "deblocking filter row store",
2780                       width_in_mbs * 64 * 4,
2781                       0x1000);
2782     assert(bo);
2783     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2784     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2785
2786     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2787     bo = dri_bo_alloc(i965->intel.bufmgr,
2788                       "bsd mpc row store",
2789                       width_in_mbs * 64 * 2,
2790                       0x1000);
2791     assert(bo);
2792     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2793     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2794
2795     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2796     bo = dri_bo_alloc(i965->intel.bufmgr,
2797                       "mpr row store",
2798                       width_in_mbs * 64 * 2,
2799                       0x1000);
2800     assert(bo);
2801     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2802     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2803
2804     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2805 }
2806
2807 static void
2808 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2809                        struct decode_state *decode_state,
2810                        struct gen7_mfd_context *gen7_mfd_context)
2811 {
2812     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2813     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2814     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2815     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2816     dri_bo *probs_bo = decode_state->probability_data->bo;
2817     int i, j,log2num;
2818     unsigned int quantization_value[4][6];
2819
2820     /* There is no safe way to error out if the segmentation buffer
2821        could not be allocated. So, instead of aborting, simply decode
2822        something even if the result may look totally inacurate */
2823     const unsigned int enable_segmentation =
2824         pic_param->pic_fields.bits.segmentation_enabled &&
2825         gen7_mfd_context->segmentation_buffer.valid;
2826         
2827     log2num = (int)log2(slice_param->num_of_partitions - 1);
2828
2829     BEGIN_BCS_BATCH(batch, 38);
2830     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2831     OUT_BCS_BATCH(batch,
2832                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2833                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2834     OUT_BCS_BATCH(batch,
2835                   log2num << 24 |
2836                   pic_param->pic_fields.bits.sharpness_level << 16 |
2837                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2838                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2839                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2840                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2841                   pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2842                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2843                   (enable_segmentation &&
2844                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2845                   (enable_segmentation &&
2846                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2847                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2848                   pic_param->pic_fields.bits.filter_type << 4 |
2849                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2850                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2851
2852     OUT_BCS_BATCH(batch,
2853                   pic_param->loop_filter_level[3] << 24 |
2854                   pic_param->loop_filter_level[2] << 16 |
2855                   pic_param->loop_filter_level[1] <<  8 |
2856                   pic_param->loop_filter_level[0] <<  0);
2857
2858     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2859     for (i = 0; i < 4; i++) {
2860                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2861                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2862                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2863                 /* 101581>>16 is equivalent to 155/100 */
2864                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2865                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2866                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2867
2868                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2869                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2870
2871                 OUT_BCS_BATCH(batch,
2872                       quantization_value[i][0] << 16 | /* Y1AC */
2873                       quantization_value[i][1] <<  0); /* Y1DC */
2874         OUT_BCS_BATCH(batch,
2875                       quantization_value[i][5] << 16 | /* UVAC */
2876                       quantization_value[i][4] <<  0); /* UVDC */
2877         OUT_BCS_BATCH(batch,
2878                       quantization_value[i][3] << 16 | /* Y2AC */
2879                       quantization_value[i][2] <<  0); /* Y2DC */
2880     }
2881
2882     /* CoeffProbability table for non-key frame, DW16-DW18 */
2883     if (probs_bo) {
2884         OUT_BCS_RELOC(batch, probs_bo,
2885                       0, I915_GEM_DOMAIN_INSTRUCTION,
2886                       0);
2887         OUT_BCS_BATCH(batch, 0);
2888         OUT_BCS_BATCH(batch, 0);
2889     } else {
2890         OUT_BCS_BATCH(batch, 0);
2891         OUT_BCS_BATCH(batch, 0);
2892         OUT_BCS_BATCH(batch, 0);
2893     }
2894
2895     OUT_BCS_BATCH(batch,
2896                   pic_param->mb_segment_tree_probs[2] << 16 |
2897                   pic_param->mb_segment_tree_probs[1] <<  8 |
2898                   pic_param->mb_segment_tree_probs[0] <<  0);
2899
2900     OUT_BCS_BATCH(batch,
2901                   pic_param->prob_skip_false << 24 |
2902                   pic_param->prob_intra      << 16 |
2903                   pic_param->prob_last       <<  8 |
2904                   pic_param->prob_gf         <<  0);
2905
2906     OUT_BCS_BATCH(batch,
2907                   pic_param->y_mode_probs[3] << 24 |
2908                   pic_param->y_mode_probs[2] << 16 |
2909                   pic_param->y_mode_probs[1] <<  8 |
2910                   pic_param->y_mode_probs[0] <<  0);
2911
2912     OUT_BCS_BATCH(batch,
2913                   pic_param->uv_mode_probs[2] << 16 |
2914                   pic_param->uv_mode_probs[1] <<  8 |
2915                   pic_param->uv_mode_probs[0] <<  0);
2916     
2917     /* MV update value, DW23-DW32 */
2918     for (i = 0; i < 2; i++) {
2919         for (j = 0; j < 20; j += 4) {
2920             OUT_BCS_BATCH(batch,
2921                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2922                           pic_param->mv_probs[i][j + 2] << 16 |
2923                           pic_param->mv_probs[i][j + 1] <<  8 |
2924                           pic_param->mv_probs[i][j + 0] <<  0);
2925         }
2926     }
2927
2928     OUT_BCS_BATCH(batch,
2929                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2930                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2931                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2932                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2933
2934     OUT_BCS_BATCH(batch,
2935                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2936                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2937                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2938                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2939
2940     /* segmentation id stream base address, DW35-DW37 */
2941     if (enable_segmentation) {
2942         OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2943                       0, I915_GEM_DOMAIN_INSTRUCTION,
2944                       0);
2945         OUT_BCS_BATCH(batch, 0);
2946         OUT_BCS_BATCH(batch, 0);
2947     }
2948     else {
2949         OUT_BCS_BATCH(batch, 0);
2950         OUT_BCS_BATCH(batch, 0);
2951         OUT_BCS_BATCH(batch, 0);
2952     }
2953     ADVANCE_BCS_BATCH(batch);
2954 }
2955
2956 static void
2957 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2958                         VAPictureParameterBufferVP8 *pic_param,
2959                         VASliceParameterBufferVP8 *slice_param,
2960                         dri_bo *slice_data_bo,
2961                         struct gen7_mfd_context *gen7_mfd_context)
2962 {
2963     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2964     int i, log2num;
2965     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2966     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2967     unsigned int partition_size_0 = slice_param->partition_size[0];
2968
2969     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2970     if (used_bits == 8) {
2971         used_bits = 0;
2972         offset += 1;
2973         partition_size_0 -= 1;
2974     }
2975
2976     assert(slice_param->num_of_partitions >= 2);
2977     assert(slice_param->num_of_partitions <= 9);
2978
2979     log2num = (int)log2(slice_param->num_of_partitions - 1);
2980
2981     BEGIN_BCS_BATCH(batch, 22);
2982     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2983     OUT_BCS_BATCH(batch,
2984                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2985                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2986                   log2num << 4 |
2987                   (slice_param->macroblock_offset & 0x7));
2988     OUT_BCS_BATCH(batch,
2989                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2990                   0);
2991
2992     OUT_BCS_BATCH(batch, partition_size_0);
2993     OUT_BCS_BATCH(batch, offset);
2994     //partion sizes in bytes are present after the above first partition when there are more than one token partition
2995     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2996     for (i = 1; i < 9; i++) {
2997         if (i < slice_param->num_of_partitions) {
2998             OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2999             OUT_BCS_BATCH(batch, offset);
3000         } else {
3001             OUT_BCS_BATCH(batch, 0);
3002             OUT_BCS_BATCH(batch, 0);
3003         }
3004
3005         offset += slice_param->partition_size[i];
3006     }
3007
3008     OUT_BCS_BATCH(batch,
3009                   1 << 31 | /* concealment method */
3010                   0);
3011
3012     ADVANCE_BCS_BATCH(batch);
3013 }
3014
3015 void
3016 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3017                             struct decode_state *decode_state,
3018                             struct gen7_mfd_context *gen7_mfd_context)
3019 {
3020     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3021     VAPictureParameterBufferVP8 *pic_param;
3022     VASliceParameterBufferVP8 *slice_param;
3023     dri_bo *slice_data_bo;
3024
3025     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3026     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3027
3028     /* one slice per frame */
3029     if (decode_state->num_slice_params != 1 ||
3030         (!decode_state->slice_params ||
3031          !decode_state->slice_params[0] ||
3032          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3033         (!decode_state->slice_datas ||
3034          !decode_state->slice_datas[0] ||
3035          !decode_state->slice_datas[0]->bo) ||
3036         !decode_state->probability_data) {
3037         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3038
3039         return;
3040     }
3041
3042     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3043     slice_data_bo = decode_state->slice_datas[0]->bo;
3044
3045     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3046     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3047     intel_batchbuffer_emit_mi_flush(batch);
3048     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3049     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3050     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3051     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3052     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3053     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3054     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3055     intel_batchbuffer_end_atomic(batch);
3056     intel_batchbuffer_flush(batch);
3057 }
3058
3059 static VAStatus
3060 gen8_mfd_decode_picture(VADriverContextP ctx, 
3061                         VAProfile profile, 
3062                         union codec_state *codec_state,
3063                         struct hw_context *hw_context)
3064
3065 {
3066     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3067     struct decode_state *decode_state = &codec_state->decode;
3068     VAStatus vaStatus;
3069
3070     assert(gen7_mfd_context);
3071
3072     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3073
3074     if (vaStatus != VA_STATUS_SUCCESS)
3075         goto out;
3076
3077     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3078
3079     switch (profile) {
3080     case VAProfileMPEG2Simple:
3081     case VAProfileMPEG2Main:
3082         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3083         break;
3084         
3085     case VAProfileH264ConstrainedBaseline:
3086     case VAProfileH264Main:
3087     case VAProfileH264High:
3088         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3089         break;
3090
3091     case VAProfileVC1Simple:
3092     case VAProfileVC1Main:
3093     case VAProfileVC1Advanced:
3094         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3095         break;
3096
3097     case VAProfileJPEGBaseline:
3098         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3099         break;
3100
3101     case VAProfileVP8Version0_3:
3102         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3103         break;
3104
3105     default:
3106         assert(0);
3107         break;
3108     }
3109
3110     vaStatus = VA_STATUS_SUCCESS;
3111
3112 out:
3113     return vaStatus;
3114 }
3115
3116 static void
3117 gen8_mfd_context_destroy(void *hw_context)
3118 {
3119     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3120
3121     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3122     gen7_mfd_context->post_deblocking_output.bo = NULL;
3123
3124     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3125     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3126
3127     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3128     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3129
3130     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3131     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3132
3133     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3134     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3135
3136     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3137     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3138
3139     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3140     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3141
3142     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3143     gen7_mfd_context->segmentation_buffer.bo = NULL;
3144
3145     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3146
3147     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3148     free(gen7_mfd_context);
3149 }
3150
3151 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3152                                     struct gen7_mfd_context *gen7_mfd_context)
3153 {
3154     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3155     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3156     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3157     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3158 }
3159
3160 struct hw_context *
3161 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3162 {
3163     struct intel_driver_data *intel = intel_driver_data(ctx);
3164     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3165     int i;
3166
3167     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3168     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3169     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3170
3171     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3172         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3173         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3174     }
3175
3176     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3177     gen7_mfd_context->segmentation_buffer.valid = 0;
3178
3179     switch (obj_config->profile) {
3180     case VAProfileMPEG2Simple:
3181     case VAProfileMPEG2Main:
3182         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3183         break;
3184
3185     case VAProfileH264ConstrainedBaseline:
3186     case VAProfileH264Main:
3187     case VAProfileH264High:
3188         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3189         break;
3190     default:
3191         break;
3192     }
3193     return (struct hw_context *)gen7_mfd_context;
3194 }