OSDN Git Service

Check pointer returned from calloc()
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77
78         if (!gen7_avc_surface)
79             return;
80
81         gen7_avc_surface->base.frame_store_id = -1;
82         assert((obj_surface->size & 0x3f) == 0);
83         obj_surface->private_data = gen7_avc_surface;
84     }
85
86     /* DMV buffers now relate to the whole frame, irrespective of
87        field coding modes */
88     if (gen7_avc_surface->dmv_top == NULL) {
89         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90                                                  "direct mv w/r buffer",
91                                                  width_in_mbs * height_in_mbs * 128,
92                                                  0x1000);
93         assert(gen7_avc_surface->dmv_top);
94     }
95 }
96
97 static void
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99                           struct decode_state *decode_state,
100                           int standard_select,
101                           struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG ||
109            standard_select == MFX_FORMAT_VP8);
110
111     BEGIN_BCS_BATCH(batch, 5);
112     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
113     OUT_BCS_BATCH(batch,
114                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
115                   (MFD_MODE_VLD << 15) | /* VLD mode */
116                   (0 << 10) | /* disable Stream-Out */
117                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
118                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
119                   (0 << 5)  | /* not in stitch mode */
120                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
121                   (standard_select << 0));
122     OUT_BCS_BATCH(batch,
123                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
124                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
125                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
126                   (0 << 1)  |
127                   (0 << 0));
128     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
129     OUT_BCS_BATCH(batch, 0); /* reserved */
130     ADVANCE_BCS_BATCH(batch);
131 }
132
133 static void
134 gen8_mfd_surface_state(VADriverContextP ctx,
135                        struct decode_state *decode_state,
136                        int standard_select,
137                        struct gen7_mfd_context *gen7_mfd_context)
138 {
139     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140     struct object_surface *obj_surface = decode_state->render_object;
141     unsigned int y_cb_offset;
142     unsigned int y_cr_offset;
143     unsigned int surface_format;
144
145     assert(obj_surface);
146
147     y_cb_offset = obj_surface->y_cb_offset;
148     y_cr_offset = obj_surface->y_cr_offset;
149
150     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
152
153     BEGIN_BCS_BATCH(batch, 6);
154     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155     OUT_BCS_BATCH(batch, 0);
156     OUT_BCS_BATCH(batch,
157                   ((obj_surface->orig_height - 1) << 18) |
158                   ((obj_surface->orig_width - 1) << 4));
159     OUT_BCS_BATCH(batch,
160                   (surface_format << 28) | /* 420 planar YUV surface */
161                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162                   (0 << 22) | /* surface object control state, ignored */
163                   ((obj_surface->width - 1) << 3) | /* pitch */
164                   (0 << 2)  | /* must be 0 */
165                   (1 << 1)  | /* must be tiled */
166                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
167     OUT_BCS_BATCH(batch,
168                   (0 << 16) | /* X offset for U(Cb), must be 0 */
169                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
170     OUT_BCS_BATCH(batch,
171                   (0 << 16) | /* X offset for V(Cr), must be 0 */
172                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173     ADVANCE_BCS_BATCH(batch);
174 }
175
176 static void
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178                              struct decode_state *decode_state,
179                              int standard_select,
180                              struct gen7_mfd_context *gen7_mfd_context)
181 {
182     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
183     int i;
184
185     BEGIN_BCS_BATCH(batch, 61);
186     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
187         /* Pre-deblock 1-3 */
188     if (gen7_mfd_context->pre_deblocking_output.valid)
189         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
190                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
191                       0);
192     else
193         OUT_BCS_BATCH(batch, 0);
194
195         OUT_BCS_BATCH(batch, 0);
196         OUT_BCS_BATCH(batch, 0);
197         /* Post-debloing 4-6 */
198     if (gen7_mfd_context->post_deblocking_output.valid)
199         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
200                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
201                       0);
202     else
203         OUT_BCS_BATCH(batch, 0);
204
205         OUT_BCS_BATCH(batch, 0);
206         OUT_BCS_BATCH(batch, 0);
207
208         /* uncompressed-video & stream out 7-12 */
209     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
210     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
211         OUT_BCS_BATCH(batch, 0);
212         OUT_BCS_BATCH(batch, 0);
213         OUT_BCS_BATCH(batch, 0);
214         OUT_BCS_BATCH(batch, 0);
215
216         /* intra row-store scratch 13-15 */
217     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
218         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
219                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
220                       0);
221     else
222         OUT_BCS_BATCH(batch, 0);
223
224         OUT_BCS_BATCH(batch, 0);
225         OUT_BCS_BATCH(batch, 0);
226         /* deblocking-filter-row-store 16-18 */
227     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
228         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
229                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
230                       0);
231     else
232         OUT_BCS_BATCH(batch, 0);
233         OUT_BCS_BATCH(batch, 0);
234         OUT_BCS_BATCH(batch, 0);
235
236     /* DW 19..50 */
237     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
238         struct object_surface *obj_surface;
239
240         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
241             gen7_mfd_context->reference_surface[i].obj_surface &&
242             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
243             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
244
245             OUT_BCS_RELOC(batch, obj_surface->bo,
246                           I915_GEM_DOMAIN_INSTRUCTION, 0,
247                           0);
248         } else {
249             OUT_BCS_BATCH(batch, 0);
250         }
251         
252         OUT_BCS_BATCH(batch, 0);
253     }
254     
255     /* reference property 51 */
256     OUT_BCS_BATCH(batch, 0);  
257         
258     /* Macroblock status & ILDB 52-57 */
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262     OUT_BCS_BATCH(batch, 0);
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265
266     /* the second Macroblock status 58-60 */    
267     OUT_BCS_BATCH(batch, 0);
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270
271     ADVANCE_BCS_BATCH(batch);
272 }
273
274 static void
275 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
276                                  dri_bo *slice_data_bo,
277                                  int standard_select,
278                                  struct gen7_mfd_context *gen7_mfd_context)
279 {
280     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
281
282     BEGIN_BCS_BATCH(batch, 26);
283     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
284         /* MFX In BS 1-5 */
285     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
286     OUT_BCS_BATCH(batch, 0);
287     OUT_BCS_BATCH(batch, 0);
288         /* Upper bound 4-5 */   
289     OUT_BCS_BATCH(batch, 0);
290     OUT_BCS_BATCH(batch, 0);
291
292         /* MFX indirect MV 6-10 */
293     OUT_BCS_BATCH(batch, 0);
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0);
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298         
299         /* MFX IT_COFF 11-15 */
300     OUT_BCS_BATCH(batch, 0);
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305
306         /* MFX IT_DBLK 16-20 */
307     OUT_BCS_BATCH(batch, 0);
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312
313         /* MFX PAK_BSE object for encoder 21-25 */
314     OUT_BCS_BATCH(batch, 0);
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319
320     ADVANCE_BCS_BATCH(batch);
321 }
322
323 static void
324 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
325                                  struct decode_state *decode_state,
326                                  int standard_select,
327                                  struct gen7_mfd_context *gen7_mfd_context)
328 {
329     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
330
331     BEGIN_BCS_BATCH(batch, 10);
332     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
333
334     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
335         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
336                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
337                       0);
338         else
339                 OUT_BCS_BATCH(batch, 0);
340                 
341     OUT_BCS_BATCH(batch, 0);
342     OUT_BCS_BATCH(batch, 0);
343         /* MPR Row Store Scratch buffer 4-6 */
344     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
345         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
346                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
347                       0);
348     else
349         OUT_BCS_BATCH(batch, 0);
350
351     OUT_BCS_BATCH(batch, 0);
352     OUT_BCS_BATCH(batch, 0);
353
354         /* Bitplane 7-9 */ 
355     if (gen7_mfd_context->bitplane_read_buffer.valid)
356         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
357                       I915_GEM_DOMAIN_INSTRUCTION, 0,
358                       0);
359     else
360         OUT_BCS_BATCH(batch, 0);
361     OUT_BCS_BATCH(batch, 0);
362     OUT_BCS_BATCH(batch, 0);
363     ADVANCE_BCS_BATCH(batch);
364 }
365
366 static void
367 gen8_mfd_qm_state(VADriverContextP ctx,
368                   int qm_type,
369                   unsigned char *qm,
370                   int qm_length,
371                   struct gen7_mfd_context *gen7_mfd_context)
372 {
373     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
374     unsigned int qm_buffer[16];
375
376     assert(qm_length <= 16 * 4);
377     memcpy(qm_buffer, qm, qm_length);
378
379     BEGIN_BCS_BATCH(batch, 18);
380     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
381     OUT_BCS_BATCH(batch, qm_type << 0);
382     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
383     ADVANCE_BCS_BATCH(batch);
384 }
385
386 static void
387 gen8_mfd_avc_img_state(VADriverContextP ctx,
388                        struct decode_state *decode_state,
389                        struct gen7_mfd_context *gen7_mfd_context)
390 {
391     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
392     int img_struct;
393     int mbaff_frame_flag;
394     unsigned int width_in_mbs, height_in_mbs;
395     VAPictureParameterBufferH264 *pic_param;
396
397     assert(decode_state->pic_param && decode_state->pic_param->buffer);
398     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
399     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
400
401     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
402         img_struct = 1;
403     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
404         img_struct = 3;
405     else
406         img_struct = 0;
407
408     if ((img_struct & 0x1) == 0x1) {
409         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
410     } else {
411         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
412     }
413
414     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
415         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
416         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
417     } else {
418         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
419     }
420
421     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
422                         !pic_param->pic_fields.bits.field_pic_flag);
423
424     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
425     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
426
427     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
428     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
429            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
430     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
431
432     BEGIN_BCS_BATCH(batch, 17);
433     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
434     OUT_BCS_BATCH(batch, 
435                   (width_in_mbs * height_in_mbs - 1));
436     OUT_BCS_BATCH(batch, 
437                   ((height_in_mbs - 1) << 16) | 
438                   ((width_in_mbs - 1) << 0));
439     OUT_BCS_BATCH(batch, 
440                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
441                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
442                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
443                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
444                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
445                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
446                   (img_struct << 8));
447     OUT_BCS_BATCH(batch,
448                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
449                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
450                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
451                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
452                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
453                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
454                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
455                   (mbaff_frame_flag << 1) |
456                   (pic_param->pic_fields.bits.field_pic_flag << 0));
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     OUT_BCS_BATCH(batch, 0);
465     OUT_BCS_BATCH(batch, 0);
466     OUT_BCS_BATCH(batch, 0);
467     OUT_BCS_BATCH(batch, 0);
468     OUT_BCS_BATCH(batch, 0);
469     ADVANCE_BCS_BATCH(batch);
470 }
471
472 static void
473 gen8_mfd_avc_qm_state(VADriverContextP ctx,
474                       struct decode_state *decode_state,
475                       struct gen7_mfd_context *gen7_mfd_context)
476 {
477     VAIQMatrixBufferH264 *iq_matrix;
478     VAPictureParameterBufferH264 *pic_param;
479
480     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
481         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
482     else
483         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
484
485     assert(decode_state->pic_param && decode_state->pic_param->buffer);
486     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
487
488     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
489     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
490
491     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
492         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
493         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
494     }
495 }
496
497 static inline void
498 gen8_mfd_avc_picid_state(VADriverContextP ctx,
499     struct decode_state *decode_state,
500     struct gen7_mfd_context *gen7_mfd_context)
501 {
502     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
503         gen7_mfd_context->reference_surface);
504 }
505
506 static void
507 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
508                               struct decode_state *decode_state,
509                               VAPictureParameterBufferH264 *pic_param,
510                               VASliceParameterBufferH264 *slice_param,
511                               struct gen7_mfd_context *gen7_mfd_context)
512 {
513     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
514     struct object_surface *obj_surface;
515     GenAvcSurface *gen7_avc_surface;
516     VAPictureH264 *va_pic;
517     int i;
518
519     BEGIN_BCS_BATCH(batch, 71);
520     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
521
522     /* reference surfaces 0..15 */
523     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
524         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
525             gen7_mfd_context->reference_surface[i].obj_surface &&
526             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
527
528             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
529             gen7_avc_surface = obj_surface->private_data;
530
531             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
532                           I915_GEM_DOMAIN_INSTRUCTION, 0,
533                           0);
534             OUT_BCS_BATCH(batch, 0);
535         } else {
536             OUT_BCS_BATCH(batch, 0);
537             OUT_BCS_BATCH(batch, 0);
538         }
539     }
540     
541     OUT_BCS_BATCH(batch, 0);
542
543     /* the current decoding frame/field */
544     va_pic = &pic_param->CurrPic;
545     obj_surface = decode_state->render_object;
546     assert(obj_surface->bo && obj_surface->private_data);
547     gen7_avc_surface = obj_surface->private_data;
548
549     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
550                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
551                   0);
552
553     OUT_BCS_BATCH(batch, 0);
554     OUT_BCS_BATCH(batch, 0);
555
556     /* POC List */
557     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
558         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
559
560         if (obj_surface) {
561             const VAPictureH264 * const va_pic = avc_find_picture(
562                 obj_surface->base.id, pic_param->ReferenceFrames,
563                 ARRAY_ELEMS(pic_param->ReferenceFrames));
564
565             assert(va_pic != NULL);
566             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
567             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
568         } else {
569             OUT_BCS_BATCH(batch, 0);
570             OUT_BCS_BATCH(batch, 0);
571         }
572     }
573
574     va_pic = &pic_param->CurrPic;
575     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
576     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
577
578     ADVANCE_BCS_BATCH(batch);
579 }
580
581 static void
582 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
583                                  VAPictureParameterBufferH264 *pic_param,
584                                  VASliceParameterBufferH264 *next_slice_param,
585                                  struct gen7_mfd_context *gen7_mfd_context)
586 {
587     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
588 }
589
590 static void
591 gen8_mfd_avc_slice_state(VADriverContextP ctx,
592                          VAPictureParameterBufferH264 *pic_param,
593                          VASliceParameterBufferH264 *slice_param,
594                          VASliceParameterBufferH264 *next_slice_param,
595                          struct gen7_mfd_context *gen7_mfd_context)
596 {
597     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
598     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
599     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
600     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
601     int num_ref_idx_l0, num_ref_idx_l1;
602     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
603                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
604     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
605     int slice_type;
606
607     if (slice_param->slice_type == SLICE_TYPE_I ||
608         slice_param->slice_type == SLICE_TYPE_SI) {
609         slice_type = SLICE_TYPE_I;
610     } else if (slice_param->slice_type == SLICE_TYPE_P ||
611                slice_param->slice_type == SLICE_TYPE_SP) {
612         slice_type = SLICE_TYPE_P;
613     } else { 
614         assert(slice_param->slice_type == SLICE_TYPE_B);
615         slice_type = SLICE_TYPE_B;
616     }
617
618     if (slice_type == SLICE_TYPE_I) {
619         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
620         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
621         num_ref_idx_l0 = 0;
622         num_ref_idx_l1 = 0;
623     } else if (slice_type == SLICE_TYPE_P) {
624         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
625         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
626         num_ref_idx_l1 = 0;
627     } else {
628         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
629         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
630     }
631
632     first_mb_in_slice = slice_param->first_mb_in_slice;
633     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
634     slice_ver_pos = first_mb_in_slice / width_in_mbs;
635
636     if (mbaff_picture)
637         slice_ver_pos = slice_ver_pos << 1;
638     if (next_slice_param) {
639         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
640         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
641         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
642
643         if (mbaff_picture)
644             next_slice_ver_pos = next_slice_ver_pos << 1;
645     } else {
646         next_slice_hor_pos = 0;
647         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
648     }
649
650     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
651     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
652     OUT_BCS_BATCH(batch, slice_type);
653     OUT_BCS_BATCH(batch, 
654                   (num_ref_idx_l1 << 24) |
655                   (num_ref_idx_l0 << 16) |
656                   (slice_param->chroma_log2_weight_denom << 8) |
657                   (slice_param->luma_log2_weight_denom << 0));
658     OUT_BCS_BATCH(batch, 
659                   (slice_param->direct_spatial_mv_pred_flag << 29) |
660                   (slice_param->disable_deblocking_filter_idc << 27) |
661                   (slice_param->cabac_init_idc << 24) |
662                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
663                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
664                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
665     OUT_BCS_BATCH(batch, 
666                   (slice_ver_pos << 24) |
667                   (slice_hor_pos << 16) | 
668                   (first_mb_in_slice << 0));
669     OUT_BCS_BATCH(batch,
670                   (next_slice_ver_pos << 16) |
671                   (next_slice_hor_pos << 0));
672     OUT_BCS_BATCH(batch, 
673                   (next_slice_param == NULL) << 19); /* last slice flag */
674     OUT_BCS_BATCH(batch, 0);
675     OUT_BCS_BATCH(batch, 0);
676     OUT_BCS_BATCH(batch, 0);
677     OUT_BCS_BATCH(batch, 0);
678     ADVANCE_BCS_BATCH(batch);
679 }
680
681 static inline void
682 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
683                            VAPictureParameterBufferH264 *pic_param,
684                            VASliceParameterBufferH264 *slice_param,
685                            struct gen7_mfd_context *gen7_mfd_context)
686 {
687     gen6_send_avc_ref_idx_state(
688         gen7_mfd_context->base.batch,
689         slice_param,
690         gen7_mfd_context->reference_surface
691     );
692 }
693
694 static void
695 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
696                                 VAPictureParameterBufferH264 *pic_param,
697                                 VASliceParameterBufferH264 *slice_param,
698                                 struct gen7_mfd_context *gen7_mfd_context)
699 {
700     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
701     int i, j, num_weight_offset_table = 0;
702     short weightoffsets[32 * 6];
703
704     if ((slice_param->slice_type == SLICE_TYPE_P ||
705          slice_param->slice_type == SLICE_TYPE_SP) &&
706         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
707         num_weight_offset_table = 1;
708     }
709     
710     if ((slice_param->slice_type == SLICE_TYPE_B) &&
711         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
712         num_weight_offset_table = 2;
713     }
714
715     for (i = 0; i < num_weight_offset_table; i++) {
716         BEGIN_BCS_BATCH(batch, 98);
717         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
718         OUT_BCS_BATCH(batch, i);
719
720         if (i == 0) {
721             for (j = 0; j < 32; j++) {
722                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
723                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
724                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
725                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
726                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
727                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
728             }
729         } else {
730             for (j = 0; j < 32; j++) {
731                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
732                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
733                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
734                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
735                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
736                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
737             }
738         }
739
740         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
741         ADVANCE_BCS_BATCH(batch);
742     }
743 }
744
745 static void
746 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
747                         VAPictureParameterBufferH264 *pic_param,
748                         VASliceParameterBufferH264 *slice_param,
749                         dri_bo *slice_data_bo,
750                         VASliceParameterBufferH264 *next_slice_param,
751                         struct gen7_mfd_context *gen7_mfd_context)
752 {
753     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
754     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
755                                                             slice_param,
756                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
757
758     /* the input bitsteam format on GEN7 differs from GEN6 */
759     BEGIN_BCS_BATCH(batch, 6);
760     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
761     OUT_BCS_BATCH(batch, 
762                   (slice_param->slice_data_size));
763     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
764     OUT_BCS_BATCH(batch,
765                   (0 << 31) |
766                   (0 << 14) |
767                   (0 << 12) |
768                   (0 << 10) |
769                   (0 << 8));
770     OUT_BCS_BATCH(batch,
771                   ((slice_data_bit_offset >> 3) << 16) |
772                   (1 << 7)  |
773                   (0 << 5)  |
774                   (0 << 4)  |
775                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
776                   (slice_data_bit_offset & 0x7));
777     OUT_BCS_BATCH(batch, 0);
778     ADVANCE_BCS_BATCH(batch);
779 }
780
781 static inline void
782 gen8_mfd_avc_context_init(
783     VADriverContextP         ctx,
784     struct gen7_mfd_context *gen7_mfd_context
785 )
786 {
787     /* Initialize flat scaling lists */
788     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
789 }
790
791 static void
792 gen8_mfd_avc_decode_init(VADriverContextP ctx,
793                          struct decode_state *decode_state,
794                          struct gen7_mfd_context *gen7_mfd_context)
795 {
796     VAPictureParameterBufferH264 *pic_param;
797     VASliceParameterBufferH264 *slice_param;
798     struct i965_driver_data *i965 = i965_driver_data(ctx);
799     struct object_surface *obj_surface;
800     dri_bo *bo;
801     int i, j, enable_avc_ildb = 0;
802     unsigned int width_in_mbs, height_in_mbs;
803
804     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
805         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
806         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
807
808         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
809             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
810             assert((slice_param->slice_type == SLICE_TYPE_I) ||
811                    (slice_param->slice_type == SLICE_TYPE_SI) ||
812                    (slice_param->slice_type == SLICE_TYPE_P) ||
813                    (slice_param->slice_type == SLICE_TYPE_SP) ||
814                    (slice_param->slice_type == SLICE_TYPE_B));
815
816             if (slice_param->disable_deblocking_filter_idc != 1) {
817                 enable_avc_ildb = 1;
818                 break;
819             }
820
821             slice_param++;
822         }
823     }
824
825     assert(decode_state->pic_param && decode_state->pic_param->buffer);
826     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
827     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
828         gen7_mfd_context->reference_surface);
829     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
830     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
831     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
832     assert(height_in_mbs > 0 && height_in_mbs <= 256);
833
834     /* Current decoded picture */
835     obj_surface = decode_state->render_object;
836     if (pic_param->pic_fields.bits.reference_pic_flag)
837         obj_surface->flags |= SURFACE_REFERENCED;
838     else
839         obj_surface->flags &= ~SURFACE_REFERENCED;
840
841     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
842     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
843
844     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
845     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
846     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
847     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
848
849     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
850     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
851     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
852     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
853
854     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
855     bo = dri_bo_alloc(i965->intel.bufmgr,
856                       "intra row store",
857                       width_in_mbs * 64,
858                       0x1000);
859     assert(bo);
860     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
861     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
862
863     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
864     bo = dri_bo_alloc(i965->intel.bufmgr,
865                       "deblocking filter row store",
866                       width_in_mbs * 64 * 4,
867                       0x1000);
868     assert(bo);
869     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
870     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
871
872     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
873     bo = dri_bo_alloc(i965->intel.bufmgr,
874                       "bsd mpc row store",
875                       width_in_mbs * 64 * 2,
876                       0x1000);
877     assert(bo);
878     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
879     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
880
881     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
882     bo = dri_bo_alloc(i965->intel.bufmgr,
883                       "mpr row store",
884                       width_in_mbs * 64 * 2,
885                       0x1000);
886     assert(bo);
887     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
888     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
889
890     gen7_mfd_context->bitplane_read_buffer.valid = 0;
891 }
892
893 static void
894 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
895                             struct decode_state *decode_state,
896                             struct gen7_mfd_context *gen7_mfd_context)
897 {
898     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
899     VAPictureParameterBufferH264 *pic_param;
900     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
901     dri_bo *slice_data_bo;
902     int i, j;
903
904     assert(decode_state->pic_param && decode_state->pic_param->buffer);
905     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
906     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
907
908     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
909     intel_batchbuffer_emit_mi_flush(batch);
910     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
911     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
912     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
913     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
914     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
915     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
916     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
917
918     for (j = 0; j < decode_state->num_slice_params; j++) {
919         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
920         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
921         slice_data_bo = decode_state->slice_datas[j]->bo;
922         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
923
924         if (j == decode_state->num_slice_params - 1)
925             next_slice_group_param = NULL;
926         else
927             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
928
929         if (j == 0 && slice_param->first_mb_in_slice)
930             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); 
931
932         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
933             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
934             assert((slice_param->slice_type == SLICE_TYPE_I) ||
935                    (slice_param->slice_type == SLICE_TYPE_SI) ||
936                    (slice_param->slice_type == SLICE_TYPE_P) ||
937                    (slice_param->slice_type == SLICE_TYPE_SP) ||
938                    (slice_param->slice_type == SLICE_TYPE_B));
939
940             if (i < decode_state->slice_params[j]->num_elements - 1)
941                 next_slice_param = slice_param + 1;
942             else
943                 next_slice_param = next_slice_group_param;
944
945             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
946             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
947             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
948             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
949             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
950             slice_param++;
951         }
952     }
953
954     intel_batchbuffer_end_atomic(batch);
955     intel_batchbuffer_flush(batch);
956 }
957
958 static void
959 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
960                            struct decode_state *decode_state,
961                            struct gen7_mfd_context *gen7_mfd_context)
962 {
963     VAPictureParameterBufferMPEG2 *pic_param;
964     struct i965_driver_data *i965 = i965_driver_data(ctx);
965     struct object_surface *obj_surface;
966     dri_bo *bo;
967     unsigned int width_in_mbs;
968
969     assert(decode_state->pic_param && decode_state->pic_param->buffer);
970     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
971     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
972
973     mpeg2_set_reference_surfaces(
974         ctx,
975         gen7_mfd_context->reference_surface,
976         decode_state,
977         pic_param
978     );
979
980     /* Current decoded picture */
981     obj_surface = decode_state->render_object;
982     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
983
984     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
985     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
986     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
987     gen7_mfd_context->pre_deblocking_output.valid = 1;
988
989     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
990     bo = dri_bo_alloc(i965->intel.bufmgr,
991                       "bsd mpc row store",
992                       width_in_mbs * 96,
993                       0x1000);
994     assert(bo);
995     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
996     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
997
998     gen7_mfd_context->post_deblocking_output.valid = 0;
999     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1000     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1001     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1002     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1003 }
1004
1005 static void
1006 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1007                          struct decode_state *decode_state,
1008                          struct gen7_mfd_context *gen7_mfd_context)
1009 {
1010     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1011     VAPictureParameterBufferMPEG2 *pic_param;
1012     unsigned int slice_concealment_disable_bit = 0;
1013
1014     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1015     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1016
1017     slice_concealment_disable_bit = 1;
1018
1019     BEGIN_BCS_BATCH(batch, 13);
1020     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1021     OUT_BCS_BATCH(batch,
1022                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1023                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1024                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1025                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1026                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1027                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1028                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1029                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1030                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1031                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1032                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1033                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1034     OUT_BCS_BATCH(batch,
1035                   pic_param->picture_coding_type << 9);
1036     OUT_BCS_BATCH(batch,
1037                   (slice_concealment_disable_bit << 31) |
1038                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1039                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1040     OUT_BCS_BATCH(batch, 0);
1041     OUT_BCS_BATCH(batch, 0);
1042     OUT_BCS_BATCH(batch, 0);
1043     OUT_BCS_BATCH(batch, 0);
1044     OUT_BCS_BATCH(batch, 0);
1045     OUT_BCS_BATCH(batch, 0);
1046     OUT_BCS_BATCH(batch, 0);
1047     OUT_BCS_BATCH(batch, 0);
1048     OUT_BCS_BATCH(batch, 0);
1049     ADVANCE_BCS_BATCH(batch);
1050 }
1051
1052 static void
1053 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1054                         struct decode_state *decode_state,
1055                         struct gen7_mfd_context *gen7_mfd_context)
1056 {
1057     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1058     int i, j;
1059
1060     /* Update internal QM state */
1061     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1062         VAIQMatrixBufferMPEG2 * const iq_matrix =
1063             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1064
1065         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1066             iq_matrix->load_intra_quantiser_matrix) {
1067             gen_iq_matrix->load_intra_quantiser_matrix =
1068                 iq_matrix->load_intra_quantiser_matrix;
1069             if (iq_matrix->load_intra_quantiser_matrix) {
1070                 for (j = 0; j < 64; j++)
1071                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1072                         iq_matrix->intra_quantiser_matrix[j];
1073             }
1074         }
1075
1076         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1077             iq_matrix->load_non_intra_quantiser_matrix) {
1078             gen_iq_matrix->load_non_intra_quantiser_matrix =
1079                 iq_matrix->load_non_intra_quantiser_matrix;
1080             if (iq_matrix->load_non_intra_quantiser_matrix) {
1081                 for (j = 0; j < 64; j++)
1082                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1083                         iq_matrix->non_intra_quantiser_matrix[j];
1084             }
1085         }
1086     }
1087
1088     /* Commit QM state to HW */
1089     for (i = 0; i < 2; i++) {
1090         unsigned char *qm = NULL;
1091         int qm_type;
1092
1093         if (i == 0) {
1094             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1095                 qm = gen_iq_matrix->intra_quantiser_matrix;
1096                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1097             }
1098         } else {
1099             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1100                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1101                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1102             }
1103         }
1104
1105         if (!qm)
1106             continue;
1107
1108         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1109     }
1110 }
1111
1112 static void
1113 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1114                           VAPictureParameterBufferMPEG2 *pic_param,
1115                           VASliceParameterBufferMPEG2 *slice_param,
1116                           VASliceParameterBufferMPEG2 *next_slice_param,
1117                           struct gen7_mfd_context *gen7_mfd_context)
1118 {
1119     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1120     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1121     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1122
1123     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1124         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1125         is_field_pic = 1;
1126     is_field_pic_wa = is_field_pic &&
1127         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1128
1129     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1130     hpos0 = slice_param->slice_horizontal_position;
1131
1132     if (next_slice_param == NULL) {
1133         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1134         hpos1 = 0;
1135     } else {
1136         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1137         hpos1 = next_slice_param->slice_horizontal_position;
1138     }
1139
1140     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1141
1142     BEGIN_BCS_BATCH(batch, 5);
1143     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1144     OUT_BCS_BATCH(batch, 
1145                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1146     OUT_BCS_BATCH(batch, 
1147                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1148     OUT_BCS_BATCH(batch,
1149                   hpos0 << 24 |
1150                   vpos0 << 16 |
1151                   mb_count << 8 |
1152                   (next_slice_param == NULL) << 5 |
1153                   (next_slice_param == NULL) << 3 |
1154                   (slice_param->macroblock_offset & 0x7));
1155     OUT_BCS_BATCH(batch,
1156                   (slice_param->quantiser_scale_code << 24) |
1157                   (vpos1 << 8 | hpos1));
1158     ADVANCE_BCS_BATCH(batch);
1159 }
1160
1161 static void
1162 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1163                               struct decode_state *decode_state,
1164                               struct gen7_mfd_context *gen7_mfd_context)
1165 {
1166     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1167     VAPictureParameterBufferMPEG2 *pic_param;
1168     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1169     dri_bo *slice_data_bo;
1170     int i, j;
1171
1172     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1173     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1174
1175     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1176     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1177     intel_batchbuffer_emit_mi_flush(batch);
1178     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1179     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1180     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1181     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1182     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1183     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1184
1185     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1186         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1187             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1188
1189     for (j = 0; j < decode_state->num_slice_params; j++) {
1190         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1191         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1192         slice_data_bo = decode_state->slice_datas[j]->bo;
1193         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1194
1195         if (j == decode_state->num_slice_params - 1)
1196             next_slice_group_param = NULL;
1197         else
1198             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1199
1200         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1201             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1202
1203             if (i < decode_state->slice_params[j]->num_elements - 1)
1204                 next_slice_param = slice_param + 1;
1205             else
1206                 next_slice_param = next_slice_group_param;
1207
1208             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1209             slice_param++;
1210         }
1211     }
1212
1213     intel_batchbuffer_end_atomic(batch);
1214     intel_batchbuffer_flush(batch);
1215 }
1216
1217 static const int va_to_gen7_vc1_pic_type[5] = {
1218     GEN7_VC1_I_PICTURE,
1219     GEN7_VC1_P_PICTURE,
1220     GEN7_VC1_B_PICTURE,
1221     GEN7_VC1_BI_PICTURE,
1222     GEN7_VC1_P_PICTURE,
1223 };
1224
1225 static const int va_to_gen7_vc1_mv[4] = {
1226     1, /* 1-MV */
1227     2, /* 1-MV half-pel */
1228     3, /* 1-MV half-pef bilinear */
1229     0, /* Mixed MV */
1230 };
1231
1232 static const int b_picture_scale_factor[21] = {
1233     128, 85,  170, 64,  192,
1234     51,  102, 153, 204, 43,
1235     215, 37,  74,  111, 148,
1236     185, 222, 32,  96,  160, 
1237     224,
1238 };
1239
1240 static const int va_to_gen7_vc1_condover[3] = {
1241     0,
1242     2,
1243     3
1244 };
1245
1246 static const int va_to_gen7_vc1_profile[4] = {
1247     GEN7_VC1_SIMPLE_PROFILE,
1248     GEN7_VC1_MAIN_PROFILE,
1249     GEN7_VC1_RESERVED_PROFILE,
1250     GEN7_VC1_ADVANCED_PROFILE
1251 };
1252
1253 static void 
1254 gen8_mfd_free_vc1_surface(void **data)
1255 {
1256     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1257
1258     if (!gen7_vc1_surface)
1259         return;
1260
1261     dri_bo_unreference(gen7_vc1_surface->dmv);
1262     free(gen7_vc1_surface);
1263     *data = NULL;
1264 }
1265
1266 static void
1267 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1268                           VAPictureParameterBufferVC1 *pic_param,
1269                           struct object_surface *obj_surface)
1270 {
1271     struct i965_driver_data *i965 = i965_driver_data(ctx);
1272     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1273     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1274     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1275
1276     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1277
1278     if (!gen7_vc1_surface) {
1279         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1280
1281         if (!gen7_vc1_surface)
1282             return;
1283
1284         assert((obj_surface->size & 0x3f) == 0);
1285         obj_surface->private_data = gen7_vc1_surface;
1286     }
1287
1288     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1289
1290     if (gen7_vc1_surface->dmv == NULL) {
1291         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1292                                              "direct mv w/r buffer",
1293                                              width_in_mbs * height_in_mbs * 64,
1294                                              0x1000);
1295     }
1296 }
1297
1298 static void
1299 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1300                          struct decode_state *decode_state,
1301                          struct gen7_mfd_context *gen7_mfd_context)
1302 {
1303     VAPictureParameterBufferVC1 *pic_param;
1304     struct i965_driver_data *i965 = i965_driver_data(ctx);
1305     struct object_surface *obj_surface;
1306     dri_bo *bo;
1307     int width_in_mbs;
1308     int picture_type;
1309
1310     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1311     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1312     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1313     picture_type = pic_param->picture_fields.bits.picture_type;
1314  
1315     intel_update_vc1_frame_store_index(ctx,
1316                                        decode_state,
1317                                        pic_param,
1318                                        gen7_mfd_context->reference_surface);
1319
1320     /* Current decoded picture */
1321     obj_surface = decode_state->render_object;
1322     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1323     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1324
1325     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1326     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1327     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1328     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1329
1330     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1331     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1332     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1333     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1334
1335     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1336     bo = dri_bo_alloc(i965->intel.bufmgr,
1337                       "intra row store",
1338                       width_in_mbs * 64,
1339                       0x1000);
1340     assert(bo);
1341     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1342     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1343
1344     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1345     bo = dri_bo_alloc(i965->intel.bufmgr,
1346                       "deblocking filter row store",
1347                       width_in_mbs * 7 * 64,
1348                       0x1000);
1349     assert(bo);
1350     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1351     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1352
1353     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1354     bo = dri_bo_alloc(i965->intel.bufmgr,
1355                       "bsd mpc row store",
1356                       width_in_mbs * 96,
1357                       0x1000);
1358     assert(bo);
1359     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1360     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1361
1362     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1363
1364     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1365     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1366     
1367     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1368         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1369         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1370         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1371         int src_w, src_h;
1372         uint8_t *src = NULL, *dst = NULL;
1373
1374         assert(decode_state->bit_plane->buffer);
1375         src = decode_state->bit_plane->buffer;
1376
1377         bo = dri_bo_alloc(i965->intel.bufmgr,
1378                           "VC-1 Bitplane",
1379                           bitplane_width * height_in_mbs,
1380                           0x1000);
1381         assert(bo);
1382         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1383
1384         dri_bo_map(bo, True);
1385         assert(bo->virtual);
1386         dst = bo->virtual;
1387
1388         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1389             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1390                 int src_index, dst_index;
1391                 int src_shift;
1392                 uint8_t src_value;
1393
1394                 src_index = (src_h * width_in_mbs + src_w) / 2;
1395                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1396                 src_value = ((src[src_index] >> src_shift) & 0xf);
1397
1398                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1399                     src_value |= 0x2;
1400                 }
1401
1402                 dst_index = src_w / 2;
1403                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1404             }
1405
1406             if (src_w & 1)
1407                 dst[src_w / 2] >>= 4;
1408
1409             dst += bitplane_width;
1410         }
1411
1412         dri_bo_unmap(bo);
1413     } else
1414         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1415 }
1416
1417 static void
1418 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1419                        struct decode_state *decode_state,
1420                        struct gen7_mfd_context *gen7_mfd_context)
1421 {
1422     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1423     VAPictureParameterBufferVC1 *pic_param;
1424     struct object_surface *obj_surface;
1425     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1426     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1427     int unified_mv_mode;
1428     int ref_field_pic_polarity = 0;
1429     int scale_factor = 0;
1430     int trans_ac_y = 0;
1431     int dmv_surface_valid = 0;
1432     int brfd = 0;
1433     int fcm = 0;
1434     int picture_type;
1435     int profile;
1436     int overlap;
1437     int interpolation_mode = 0;
1438
1439     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1440     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1441
1442     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1443     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1444     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1445     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1446     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1447     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1448     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1449     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1450
1451     if (dquant == 0) {
1452         alt_pquant_config = 0;
1453         alt_pquant_edge_mask = 0;
1454     } else if (dquant == 2) {
1455         alt_pquant_config = 1;
1456         alt_pquant_edge_mask = 0xf;
1457     } else {
1458         assert(dquant == 1);
1459         if (dquantfrm == 0) {
1460             alt_pquant_config = 0;
1461             alt_pquant_edge_mask = 0;
1462             alt_pq = 0;
1463         } else {
1464             assert(dquantfrm == 1);
1465             alt_pquant_config = 1;
1466
1467             switch (dqprofile) {
1468             case 3:
1469                 if (dqbilevel == 0) {
1470                     alt_pquant_config = 2;
1471                     alt_pquant_edge_mask = 0;
1472                 } else {
1473                     assert(dqbilevel == 1);
1474                     alt_pquant_config = 3;
1475                     alt_pquant_edge_mask = 0;
1476                 }
1477                 break;
1478                 
1479             case 0:
1480                 alt_pquant_edge_mask = 0xf;
1481                 break;
1482
1483             case 1:
1484                 if (dqdbedge == 3)
1485                     alt_pquant_edge_mask = 0x9;
1486                 else
1487                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1488
1489                 break;
1490
1491             case 2:
1492                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1493                 break;
1494
1495             default:
1496                 assert(0);
1497             }
1498         }
1499     }
1500
1501     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1502         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1503         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1504     } else {
1505         assert(pic_param->mv_fields.bits.mv_mode < 4);
1506         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1507     }
1508
1509     if (pic_param->sequence_fields.bits.interlace == 1 &&
1510         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1511         /* FIXME: calculate reference field picture polarity */
1512         assert(0);
1513         ref_field_pic_polarity = 0;
1514     }
1515
1516     if (pic_param->b_picture_fraction < 21)
1517         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1518
1519     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1520     
1521     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1522         picture_type == GEN7_VC1_I_PICTURE)
1523         picture_type = GEN7_VC1_BI_PICTURE;
1524
1525     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1526         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1527     else {
1528         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1529
1530         /*
1531          * 8.3.6.2.1 Transform Type Selection
1532          * If variable-sized transform coding is not enabled,
1533          * then the 8x8 transform shall be used for all blocks.
1534          * it is also MFX_VC1_PIC_STATE requirement.
1535          */
1536         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1537             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1538             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1539         }
1540     }
1541
1542     if (picture_type == GEN7_VC1_B_PICTURE) {
1543         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1544
1545         obj_surface = decode_state->reference_objects[1];
1546
1547         if (obj_surface)
1548             gen7_vc1_surface = obj_surface->private_data;
1549
1550         if (!gen7_vc1_surface || 
1551             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1552              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1553             dmv_surface_valid = 0;
1554         else
1555             dmv_surface_valid = 1;
1556     }
1557
1558     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1559
1560     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1561         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1562     else {
1563         if (pic_param->picture_fields.bits.top_field_first)
1564             fcm = 2;
1565         else
1566             fcm = 3;
1567     }
1568
1569     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1570         brfd = pic_param->reference_fields.bits.reference_distance;
1571         brfd = (scale_factor * brfd) >> 8;
1572         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1573
1574         if (brfd < 0)
1575             brfd = 0;
1576     }
1577
1578     overlap = pic_param->sequence_fields.bits.overlap;
1579
1580     if (overlap) {
1581         overlap = 0;
1582         if (profile != GEN7_VC1_ADVANCED_PROFILE){
1583             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1584                 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1585                 overlap = 1;
1586             }
1587         }else {
1588             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1589                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1590                 overlap = 1;
1591             }
1592             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1593                 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1594                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1595                     overlap = 1;
1596                 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1597                            va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1598                     overlap = 1;
1599                 }
1600             }
1601         }
1602     } 
1603
1604     assert(pic_param->conditional_overlap_flag < 3);
1605     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1606
1607     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1608         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1609          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1610         interpolation_mode = 9; /* Half-pel bilinear */
1611     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1612              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1613               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1614         interpolation_mode = 1; /* Half-pel bicubic */
1615     else
1616         interpolation_mode = 0; /* Quarter-pel bicubic */
1617
1618     BEGIN_BCS_BATCH(batch, 6);
1619     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1620     OUT_BCS_BATCH(batch,
1621                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1622                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1623     OUT_BCS_BATCH(batch,
1624                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1625                   dmv_surface_valid << 15 |
1626                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1627                   pic_param->rounding_control << 13 |
1628                   pic_param->sequence_fields.bits.syncmarker << 12 |
1629                   interpolation_mode << 8 |
1630                   0 << 7 | /* FIXME: scale up or down ??? */
1631                   pic_param->range_reduction_frame << 6 |
1632                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1633                   overlap << 4 |
1634                   !pic_param->picture_fields.bits.is_first_field << 3 |
1635                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1636     OUT_BCS_BATCH(batch,
1637                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1638                   picture_type << 26 |
1639                   fcm << 24 |
1640                   alt_pq << 16 |
1641                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1642                   scale_factor << 0);
1643     OUT_BCS_BATCH(batch,
1644                   unified_mv_mode << 28 |
1645                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1646                   pic_param->fast_uvmc_flag << 26 |
1647                   ref_field_pic_polarity << 25 |
1648                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1649                   pic_param->reference_fields.bits.reference_distance << 20 |
1650                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1651                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1652                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1653                   alt_pquant_edge_mask << 4 |
1654                   alt_pquant_config << 2 |
1655                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1656                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1657     OUT_BCS_BATCH(batch,
1658                   !!pic_param->bitplane_present.value << 31 |
1659                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1660                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1661                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1662                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1663                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1664                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1665                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1666                   pic_param->mv_fields.bits.mv_table << 20 |
1667                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1668                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1669                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1670                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1671                   pic_param->mb_mode_table << 8 |
1672                   trans_ac_y << 6 |
1673                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1674                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1675                   pic_param->cbp_table << 0);
1676     ADVANCE_BCS_BATCH(batch);
1677 }
1678
1679 static void
1680 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1681                              struct decode_state *decode_state,
1682                              struct gen7_mfd_context *gen7_mfd_context)
1683 {
1684     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1685     VAPictureParameterBufferVC1 *pic_param;
1686     int intensitycomp_single;
1687
1688     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1689     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1690     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1691
1692     BEGIN_BCS_BATCH(batch, 6);
1693     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1694     OUT_BCS_BATCH(batch,
1695                   0 << 14 | /* FIXME: double ??? */
1696                   0 << 12 |
1697                   intensitycomp_single << 10 |
1698                   intensitycomp_single << 8 |
1699                   0 << 4 | /* FIXME: interlace mode */
1700                   0);
1701     OUT_BCS_BATCH(batch,
1702                   pic_param->luma_shift << 16 |
1703                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1704     OUT_BCS_BATCH(batch, 0);
1705     OUT_BCS_BATCH(batch, 0);
1706     OUT_BCS_BATCH(batch, 0);
1707     ADVANCE_BCS_BATCH(batch);
1708 }
1709
1710 static void
1711 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1712                               struct decode_state *decode_state,
1713                               struct gen7_mfd_context *gen7_mfd_context)
1714 {
1715     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1716     struct object_surface *obj_surface;
1717     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1718
1719     obj_surface = decode_state->render_object;
1720
1721     if (obj_surface && obj_surface->private_data) {
1722         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1723     }
1724
1725     obj_surface = decode_state->reference_objects[1];
1726
1727     if (obj_surface && obj_surface->private_data) {
1728         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1729     }
1730
1731     BEGIN_BCS_BATCH(batch, 7);
1732     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1733
1734     if (dmv_write_buffer)
1735         OUT_BCS_RELOC(batch, dmv_write_buffer,
1736                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1737                       0);
1738     else
1739         OUT_BCS_BATCH(batch, 0);
1740
1741     OUT_BCS_BATCH(batch, 0);
1742     OUT_BCS_BATCH(batch, 0);
1743
1744     if (dmv_read_buffer)
1745         OUT_BCS_RELOC(batch, dmv_read_buffer,
1746                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1747                       0);
1748     else
1749         OUT_BCS_BATCH(batch, 0);
1750     
1751     OUT_BCS_BATCH(batch, 0);
1752     OUT_BCS_BATCH(batch, 0);
1753                   
1754     ADVANCE_BCS_BATCH(batch);
1755 }
1756
1757 static int
1758 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1759 {
1760     int out_slice_data_bit_offset;
1761     int slice_header_size = in_slice_data_bit_offset / 8;
1762     int i, j;
1763
1764     if (profile != 3)
1765         out_slice_data_bit_offset = in_slice_data_bit_offset;
1766     else {
1767         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1768             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1769                 i++, j += 2;
1770             }
1771         }
1772
1773         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1774     }
1775
1776     return out_slice_data_bit_offset;
1777 }
1778
1779 static void
1780 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1781                         VAPictureParameterBufferVC1 *pic_param,
1782                         VASliceParameterBufferVC1 *slice_param,
1783                         VASliceParameterBufferVC1 *next_slice_param,
1784                         dri_bo *slice_data_bo,
1785                         struct gen7_mfd_context *gen7_mfd_context)
1786 {
1787     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1788     int next_slice_start_vert_pos;
1789     int macroblock_offset;
1790     uint8_t *slice_data = NULL;
1791
1792     dri_bo_map(slice_data_bo, 0);
1793     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1794     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1795                                                                slice_param->macroblock_offset,
1796                                                                pic_param->sequence_fields.bits.profile);
1797     dri_bo_unmap(slice_data_bo);
1798
1799     if (next_slice_param)
1800         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1801     else
1802         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1803
1804     BEGIN_BCS_BATCH(batch, 5);
1805     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1806     OUT_BCS_BATCH(batch, 
1807                   slice_param->slice_data_size - (macroblock_offset >> 3));
1808     OUT_BCS_BATCH(batch, 
1809                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1810     OUT_BCS_BATCH(batch,
1811                   slice_param->slice_vertical_position << 16 |
1812                   next_slice_start_vert_pos << 0);
1813     OUT_BCS_BATCH(batch,
1814                   (macroblock_offset & 0x7));
1815     ADVANCE_BCS_BATCH(batch);
1816 }
1817
1818 static void
1819 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1820                             struct decode_state *decode_state,
1821                             struct gen7_mfd_context *gen7_mfd_context)
1822 {
1823     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1824     VAPictureParameterBufferVC1 *pic_param;
1825     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1826     dri_bo *slice_data_bo;
1827     int i, j;
1828
1829     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1830     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1831
1832     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1833     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1834     intel_batchbuffer_emit_mi_flush(batch);
1835     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1836     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1837     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1838     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1839     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1840     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1841     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1842
1843     for (j = 0; j < decode_state->num_slice_params; j++) {
1844         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1845         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1846         slice_data_bo = decode_state->slice_datas[j]->bo;
1847         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1848
1849         if (j == decode_state->num_slice_params - 1)
1850             next_slice_group_param = NULL;
1851         else
1852             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1853
1854         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1855             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1856
1857             if (i < decode_state->slice_params[j]->num_elements - 1)
1858                 next_slice_param = slice_param + 1;
1859             else
1860                 next_slice_param = next_slice_group_param;
1861
1862             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1863             slice_param++;
1864         }
1865     }
1866
1867     intel_batchbuffer_end_atomic(batch);
1868     intel_batchbuffer_flush(batch);
1869 }
1870
1871 static void
1872 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1873                           struct decode_state *decode_state,
1874                           struct gen7_mfd_context *gen7_mfd_context)
1875 {
1876     struct object_surface *obj_surface;
1877     VAPictureParameterBufferJPEGBaseline *pic_param;
1878     int subsampling = SUBSAMPLE_YUV420;
1879     int fourcc = VA_FOURCC_IMC3;
1880
1881     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1882
1883     if (pic_param->num_components == 1)
1884         subsampling = SUBSAMPLE_YUV400;
1885     else if (pic_param->num_components == 3) {
1886         int h1 = pic_param->components[0].h_sampling_factor;
1887         int h2 = pic_param->components[1].h_sampling_factor;
1888         int h3 = pic_param->components[2].h_sampling_factor;
1889         int v1 = pic_param->components[0].v_sampling_factor;
1890         int v2 = pic_param->components[1].v_sampling_factor;
1891         int v3 = pic_param->components[2].v_sampling_factor;
1892
1893         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1894             v1 == 2 && v2 == 1 && v3 == 1) {
1895             subsampling = SUBSAMPLE_YUV420;
1896             fourcc = VA_FOURCC_IMC3;
1897         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1898                    v1 == 1 && v2 == 1 && v3 == 1) {
1899             subsampling = SUBSAMPLE_YUV422H;
1900             fourcc = VA_FOURCC_422H;
1901         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1902                    v1 == 1 && v2 == 1 && v3 == 1) {
1903             subsampling = SUBSAMPLE_YUV444;
1904             fourcc = VA_FOURCC_444P;
1905         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1906                    v1 == 1 && v2 == 1 && v3 == 1) {
1907             subsampling = SUBSAMPLE_YUV411;
1908             fourcc = VA_FOURCC_411P;
1909         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1910                    v1 == 2 && v2 == 1 && v3 == 1) {
1911             subsampling = SUBSAMPLE_YUV422V;
1912             fourcc = VA_FOURCC_422V;
1913         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1914                    v1 == 2 && v2 == 2 && v3 == 2) {
1915             subsampling = SUBSAMPLE_YUV422H;
1916             fourcc = VA_FOURCC_422H;
1917         } else if (h1 == 2 && h2 == 2 && h3 == 2 &&
1918                    v1 == 2 && v2 == 1 && v3 == 1) {
1919             subsampling = SUBSAMPLE_YUV422V;
1920             fourcc = VA_FOURCC_422V;
1921         } else
1922             assert(0);
1923     }
1924     else {
1925         assert(0);
1926     }
1927
1928     /* Current decoded picture */
1929     obj_surface = decode_state->render_object;
1930     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1931
1932     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1933     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1934     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1935     gen7_mfd_context->pre_deblocking_output.valid = 1;
1936
1937     gen7_mfd_context->post_deblocking_output.bo = NULL;
1938     gen7_mfd_context->post_deblocking_output.valid = 0;
1939
1940     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1941     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1942
1943     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1944     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1945
1946     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1947     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1948
1949     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1950     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1951
1952     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1953     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1954 }
1955
1956 static const int va_to_gen7_jpeg_rotation[4] = {
1957     GEN7_JPEG_ROTATION_0,
1958     GEN7_JPEG_ROTATION_90,
1959     GEN7_JPEG_ROTATION_180,
1960     GEN7_JPEG_ROTATION_270
1961 };
1962
1963 static void
1964 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1965                         struct decode_state *decode_state,
1966                         struct gen7_mfd_context *gen7_mfd_context)
1967 {
1968     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1969     VAPictureParameterBufferJPEGBaseline *pic_param;
1970     int chroma_type = GEN7_YUV420;
1971     int frame_width_in_blks;
1972     int frame_height_in_blks;
1973
1974     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1975     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1976
1977     if (pic_param->num_components == 1)
1978         chroma_type = GEN7_YUV400;
1979     else if (pic_param->num_components == 3) {
1980         int h1 = pic_param->components[0].h_sampling_factor;
1981         int h2 = pic_param->components[1].h_sampling_factor;
1982         int h3 = pic_param->components[2].h_sampling_factor;
1983         int v1 = pic_param->components[0].v_sampling_factor;
1984         int v2 = pic_param->components[1].v_sampling_factor;
1985         int v3 = pic_param->components[2].v_sampling_factor;
1986
1987         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1988             v1 == 2 && v2 == 1 && v3 == 1)
1989             chroma_type = GEN7_YUV420;
1990         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1991                  v1 == 1 && v2 == 1 && v3 == 1)
1992             chroma_type = GEN7_YUV422H_2Y;
1993         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1994                  v1 == 1 && v2 == 1 && v3 == 1)
1995             chroma_type = GEN7_YUV444;
1996         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1997                  v1 == 1 && v2 == 1 && v3 == 1)
1998             chroma_type = GEN7_YUV411;
1999         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2000                  v1 == 2 && v2 == 1 && v3 == 1)
2001             chroma_type = GEN7_YUV422V_2Y;
2002         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2003                  v1 == 2 && v2 == 2 && v3 == 2)
2004             chroma_type = GEN7_YUV422H_4Y;
2005         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2006                  v1 == 2 && v2 == 1 && v3 == 1)
2007             chroma_type = GEN7_YUV422V_4Y;
2008         else
2009             assert(0);
2010     }
2011
2012     if (chroma_type == GEN7_YUV400 ||
2013         chroma_type == GEN7_YUV444 ||
2014         chroma_type == GEN7_YUV422V_2Y) {
2015         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2016         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2017     } else if (chroma_type == GEN7_YUV411) {
2018         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2019         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2020     } else {
2021         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2022         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2023     }
2024
2025     BEGIN_BCS_BATCH(batch, 3);
2026     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2027     OUT_BCS_BATCH(batch,
2028                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2029                   (chroma_type << 0));
2030     OUT_BCS_BATCH(batch,
2031                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2032                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2033     ADVANCE_BCS_BATCH(batch);
2034 }
2035
2036 static const int va_to_gen7_jpeg_hufftable[2] = {
2037     MFX_HUFFTABLE_ID_Y,
2038     MFX_HUFFTABLE_ID_UV
2039 };
2040
2041 static void
2042 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2043                                struct decode_state *decode_state,
2044                                struct gen7_mfd_context *gen7_mfd_context,
2045                                int num_tables)
2046 {
2047     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2048     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2049     int index;
2050
2051     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2052         return;
2053
2054     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2055
2056     for (index = 0; index < num_tables; index++) {
2057         int id = va_to_gen7_jpeg_hufftable[index];
2058         if (!huffman_table->load_huffman_table[index])
2059             continue;
2060         BEGIN_BCS_BATCH(batch, 53);
2061         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2062         OUT_BCS_BATCH(batch, id);
2063         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2064         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2065         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2066         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2067         ADVANCE_BCS_BATCH(batch);
2068     }
2069 }
2070
2071 static const int va_to_gen7_jpeg_qm[5] = {
2072     -1,
2073     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2074     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2075     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2076     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2077 };
2078
2079 static void
2080 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2081                        struct decode_state *decode_state,
2082                        struct gen7_mfd_context *gen7_mfd_context)
2083 {
2084     VAPictureParameterBufferJPEGBaseline *pic_param;
2085     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2086     int index;
2087
2088     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2089         return;
2090
2091     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2092     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2093
2094     assert(pic_param->num_components <= 3);
2095
2096     for (index = 0; index < pic_param->num_components; index++) {
2097         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2098         int qm_type;
2099         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2100         unsigned char raster_qm[64];
2101         int j;
2102
2103         if (id > 4 || id < 1)
2104             continue;
2105
2106         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2107             continue;
2108
2109         qm_type = va_to_gen7_jpeg_qm[id];
2110
2111         for (j = 0; j < 64; j++)
2112             raster_qm[zigzag_direct[j]] = qm[j];
2113
2114         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2115     }
2116 }
2117
2118 static void
2119 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2120                          VAPictureParameterBufferJPEGBaseline *pic_param,
2121                          VASliceParameterBufferJPEGBaseline *slice_param,
2122                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2123                          dri_bo *slice_data_bo,
2124                          struct gen7_mfd_context *gen7_mfd_context)
2125 {
2126     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2127     int scan_component_mask = 0;
2128     int i;
2129
2130     assert(slice_param->num_components > 0);
2131     assert(slice_param->num_components < 4);
2132     assert(slice_param->num_components <= pic_param->num_components);
2133
2134     for (i = 0; i < slice_param->num_components; i++) {
2135         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2136         case 1:
2137             scan_component_mask |= (1 << 0);
2138             break;
2139         case 2:
2140             scan_component_mask |= (1 << 1);
2141             break;
2142         case 3:
2143             scan_component_mask |= (1 << 2);
2144             break;
2145         default:
2146             assert(0);
2147             break;
2148         }
2149     }
2150
2151     BEGIN_BCS_BATCH(batch, 6);
2152     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2153     OUT_BCS_BATCH(batch, 
2154                   slice_param->slice_data_size);
2155     OUT_BCS_BATCH(batch, 
2156                   slice_param->slice_data_offset);
2157     OUT_BCS_BATCH(batch,
2158                   slice_param->slice_horizontal_position << 16 |
2159                   slice_param->slice_vertical_position << 0);
2160     OUT_BCS_BATCH(batch,
2161                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2162                   (scan_component_mask << 27) |                 /* scan components */
2163                   (0 << 26) |   /* disable interrupt allowed */
2164                   (slice_param->num_mcus << 0));                /* MCU count */
2165     OUT_BCS_BATCH(batch,
2166                   (slice_param->restart_interval << 0));    /* RestartInterval */
2167     ADVANCE_BCS_BATCH(batch);
2168 }
2169
2170 /* Workaround for JPEG decoding on Ivybridge */
2171 #ifdef JPEG_WA
2172
2173 static struct {
2174     int width;
2175     int height;
2176     unsigned char data[32];
2177     int data_size;
2178     int data_bit_offset;
2179     int qp;
2180 } gen7_jpeg_wa_clip = {
2181     16,
2182     16,
2183     {
2184         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2185         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2186     },
2187     14,
2188     40,
2189     28,
2190 };
2191
2192 static void
2193 gen8_jpeg_wa_init(VADriverContextP ctx,
2194                   struct gen7_mfd_context *gen7_mfd_context)
2195 {
2196     struct i965_driver_data *i965 = i965_driver_data(ctx);
2197     VAStatus status;
2198     struct object_surface *obj_surface;
2199
2200     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2201         i965_DestroySurfaces(ctx,
2202                              &gen7_mfd_context->jpeg_wa_surface_id,
2203                              1);
2204
2205     status = i965_CreateSurfaces(ctx,
2206                                  gen7_jpeg_wa_clip.width,
2207                                  gen7_jpeg_wa_clip.height,
2208                                  VA_RT_FORMAT_YUV420,
2209                                  1,
2210                                  &gen7_mfd_context->jpeg_wa_surface_id);
2211     assert(status == VA_STATUS_SUCCESS);
2212
2213     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2214     assert(obj_surface);
2215     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2216     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2217
2218     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2219         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2220                                                                "JPEG WA data",
2221                                                                0x1000,
2222                                                                0x1000);
2223         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2224                        0,
2225                        gen7_jpeg_wa_clip.data_size,
2226                        gen7_jpeg_wa_clip.data);
2227     }
2228 }
2229
2230 static void
2231 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2232                               struct gen7_mfd_context *gen7_mfd_context)
2233 {
2234     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2235
2236     BEGIN_BCS_BATCH(batch, 5);
2237     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2238     OUT_BCS_BATCH(batch,
2239                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2240                   (MFD_MODE_VLD << 15) | /* VLD mode */
2241                   (0 << 10) | /* disable Stream-Out */
2242                   (0 << 9)  | /* Post Deblocking Output */
2243                   (1 << 8)  | /* Pre Deblocking Output */
2244                   (0 << 5)  | /* not in stitch mode */
2245                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2246                   (MFX_FORMAT_AVC << 0));
2247     OUT_BCS_BATCH(batch,
2248                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2249                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2250                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2251                   (0 << 1)  |
2252                   (0 << 0));
2253     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2254     OUT_BCS_BATCH(batch, 0); /* reserved */
2255     ADVANCE_BCS_BATCH(batch);
2256 }
2257
2258 static void
2259 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2260                            struct gen7_mfd_context *gen7_mfd_context)
2261 {
2262     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2263     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2264
2265     BEGIN_BCS_BATCH(batch, 6);
2266     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2267     OUT_BCS_BATCH(batch, 0);
2268     OUT_BCS_BATCH(batch,
2269                   ((obj_surface->orig_width - 1) << 18) |
2270                   ((obj_surface->orig_height - 1) << 4));
2271     OUT_BCS_BATCH(batch,
2272                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2273                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2274                   (0 << 22) | /* surface object control state, ignored */
2275                   ((obj_surface->width - 1) << 3) | /* pitch */
2276                   (0 << 2)  | /* must be 0 */
2277                   (1 << 1)  | /* must be tiled */
2278                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2279     OUT_BCS_BATCH(batch,
2280                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2281                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2282     OUT_BCS_BATCH(batch,
2283                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2284                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2285     ADVANCE_BCS_BATCH(batch);
2286 }
2287
2288 static void
2289 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2290                                  struct gen7_mfd_context *gen7_mfd_context)
2291 {
2292     struct i965_driver_data *i965 = i965_driver_data(ctx);
2293     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2294     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2295     dri_bo *intra_bo;
2296     int i;
2297
2298     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2299                             "intra row store",
2300                             128 * 64,
2301                             0x1000);
2302
2303     BEGIN_BCS_BATCH(batch, 61);
2304     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2305     OUT_BCS_RELOC(batch,
2306                   obj_surface->bo,
2307                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2308                   0);
2309         OUT_BCS_BATCH(batch, 0);
2310         OUT_BCS_BATCH(batch, 0);
2311     
2312
2313     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2314         OUT_BCS_BATCH(batch, 0);
2315         OUT_BCS_BATCH(batch, 0);
2316
2317         /* uncompressed-video & stream out 7-12 */
2318     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2319     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2320         OUT_BCS_BATCH(batch, 0);
2321         OUT_BCS_BATCH(batch, 0);
2322         OUT_BCS_BATCH(batch, 0);
2323         OUT_BCS_BATCH(batch, 0);
2324
2325         /* the DW 13-15 is for intra row store scratch */
2326     OUT_BCS_RELOC(batch,
2327                   intra_bo,
2328                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2329                   0);
2330         OUT_BCS_BATCH(batch, 0);
2331         OUT_BCS_BATCH(batch, 0);
2332
2333         /* the DW 16-18 is for deblocking filter */ 
2334     OUT_BCS_BATCH(batch, 0);
2335         OUT_BCS_BATCH(batch, 0);
2336         OUT_BCS_BATCH(batch, 0);
2337
2338     /* DW 19..50 */
2339     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2340         OUT_BCS_BATCH(batch, 0);
2341         OUT_BCS_BATCH(batch, 0);
2342     }
2343     OUT_BCS_BATCH(batch, 0);
2344
2345         /* the DW52-54 is for mb status address */
2346     OUT_BCS_BATCH(batch, 0);
2347         OUT_BCS_BATCH(batch, 0);
2348         OUT_BCS_BATCH(batch, 0);
2349         /* the DW56-60 is for ILDB & second ILDB address */
2350     OUT_BCS_BATCH(batch, 0);
2351         OUT_BCS_BATCH(batch, 0);
2352         OUT_BCS_BATCH(batch, 0);
2353     OUT_BCS_BATCH(batch, 0);
2354         OUT_BCS_BATCH(batch, 0);
2355         OUT_BCS_BATCH(batch, 0);
2356
2357     ADVANCE_BCS_BATCH(batch);
2358
2359     dri_bo_unreference(intra_bo);
2360 }
2361
2362 static void
2363 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2364                                      struct gen7_mfd_context *gen7_mfd_context)
2365 {
2366     struct i965_driver_data *i965 = i965_driver_data(ctx);
2367     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2368     dri_bo *bsd_mpc_bo, *mpr_bo;
2369
2370     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2371                               "bsd mpc row store",
2372                               11520, /* 1.5 * 120 * 64 */
2373                               0x1000);
2374
2375     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2376                           "mpr row store",
2377                           7680, /* 1. 0 * 120 * 64 */
2378                           0x1000);
2379
2380     BEGIN_BCS_BATCH(batch, 10);
2381     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2382
2383     OUT_BCS_RELOC(batch,
2384                   bsd_mpc_bo,
2385                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2386                   0);
2387
2388     OUT_BCS_BATCH(batch, 0);
2389     OUT_BCS_BATCH(batch, 0);
2390
2391     OUT_BCS_RELOC(batch,
2392                   mpr_bo,
2393                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2394                   0);
2395     OUT_BCS_BATCH(batch, 0);
2396     OUT_BCS_BATCH(batch, 0);
2397
2398     OUT_BCS_BATCH(batch, 0);
2399     OUT_BCS_BATCH(batch, 0);
2400     OUT_BCS_BATCH(batch, 0);
2401
2402     ADVANCE_BCS_BATCH(batch);
2403
2404     dri_bo_unreference(bsd_mpc_bo);
2405     dri_bo_unreference(mpr_bo);
2406 }
2407
2408 static void
2409 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2410                           struct gen7_mfd_context *gen7_mfd_context)
2411 {
2412
2413 }
2414
2415 static void
2416 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2417                            struct gen7_mfd_context *gen7_mfd_context)
2418 {
2419     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2420     int img_struct = 0;
2421     int mbaff_frame_flag = 0;
2422     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2423
2424     BEGIN_BCS_BATCH(batch, 16);
2425     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2426     OUT_BCS_BATCH(batch, 
2427                   width_in_mbs * height_in_mbs);
2428     OUT_BCS_BATCH(batch, 
2429                   ((height_in_mbs - 1) << 16) | 
2430                   ((width_in_mbs - 1) << 0));
2431     OUT_BCS_BATCH(batch, 
2432                   (0 << 24) |
2433                   (0 << 16) |
2434                   (0 << 14) |
2435                   (0 << 13) |
2436                   (0 << 12) | /* differ from GEN6 */
2437                   (0 << 10) |
2438                   (img_struct << 8));
2439     OUT_BCS_BATCH(batch,
2440                   (1 << 10) | /* 4:2:0 */
2441                   (1 << 7) |  /* CABAC */
2442                   (0 << 6) |
2443                   (0 << 5) |
2444                   (0 << 4) |
2445                   (0 << 3) |
2446                   (1 << 2) |
2447                   (mbaff_frame_flag << 1) |
2448                   (0 << 0));
2449     OUT_BCS_BATCH(batch, 0);
2450     OUT_BCS_BATCH(batch, 0);
2451     OUT_BCS_BATCH(batch, 0);
2452     OUT_BCS_BATCH(batch, 0);
2453     OUT_BCS_BATCH(batch, 0);
2454     OUT_BCS_BATCH(batch, 0);
2455     OUT_BCS_BATCH(batch, 0);
2456     OUT_BCS_BATCH(batch, 0);
2457     OUT_BCS_BATCH(batch, 0);
2458     OUT_BCS_BATCH(batch, 0);
2459     OUT_BCS_BATCH(batch, 0);
2460     ADVANCE_BCS_BATCH(batch);
2461 }
2462
2463 static void
2464 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2465                                   struct gen7_mfd_context *gen7_mfd_context)
2466 {
2467     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2468     int i;
2469
2470     BEGIN_BCS_BATCH(batch, 71);
2471     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2472
2473     /* reference surfaces 0..15 */
2474     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2475         OUT_BCS_BATCH(batch, 0); /* top */
2476         OUT_BCS_BATCH(batch, 0); /* bottom */
2477     }
2478         
2479         OUT_BCS_BATCH(batch, 0);
2480
2481     /* the current decoding frame/field */
2482     OUT_BCS_BATCH(batch, 0); /* top */
2483     OUT_BCS_BATCH(batch, 0);
2484     OUT_BCS_BATCH(batch, 0);
2485
2486     /* POC List */
2487     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2488         OUT_BCS_BATCH(batch, 0);
2489         OUT_BCS_BATCH(batch, 0);
2490     }
2491
2492     OUT_BCS_BATCH(batch, 0);
2493     OUT_BCS_BATCH(batch, 0);
2494
2495     ADVANCE_BCS_BATCH(batch);
2496 }
2497
2498 static void
2499 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2500                                      struct gen7_mfd_context *gen7_mfd_context)
2501 {
2502     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2503
2504     BEGIN_BCS_BATCH(batch, 11);
2505     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2506     OUT_BCS_RELOC(batch,
2507                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2508                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2509                   0);
2510     OUT_BCS_BATCH(batch, 0);
2511     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2512     OUT_BCS_BATCH(batch, 0);
2513     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2514     OUT_BCS_BATCH(batch, 0);
2515     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2516     OUT_BCS_BATCH(batch, 0);
2517     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2518     OUT_BCS_BATCH(batch, 0);
2519     ADVANCE_BCS_BATCH(batch);
2520 }
2521
2522 static void
2523 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2524                             struct gen7_mfd_context *gen7_mfd_context)
2525 {
2526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2527
2528     /* the input bitsteam format on GEN7 differs from GEN6 */
2529     BEGIN_BCS_BATCH(batch, 6);
2530     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2531     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2532     OUT_BCS_BATCH(batch, 0);
2533     OUT_BCS_BATCH(batch,
2534                   (0 << 31) |
2535                   (0 << 14) |
2536                   (0 << 12) |
2537                   (0 << 10) |
2538                   (0 << 8));
2539     OUT_BCS_BATCH(batch,
2540                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2541                   (0 << 5)  |
2542                   (0 << 4)  |
2543                   (1 << 3) | /* LastSlice Flag */
2544                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2545     OUT_BCS_BATCH(batch, 0);
2546     ADVANCE_BCS_BATCH(batch);
2547 }
2548
2549 static void
2550 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2551                              struct gen7_mfd_context *gen7_mfd_context)
2552 {
2553     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2554     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2555     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2556     int first_mb_in_slice = 0;
2557     int slice_type = SLICE_TYPE_I;
2558
2559     BEGIN_BCS_BATCH(batch, 11);
2560     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2561     OUT_BCS_BATCH(batch, slice_type);
2562     OUT_BCS_BATCH(batch, 
2563                   (num_ref_idx_l1 << 24) |
2564                   (num_ref_idx_l0 << 16) |
2565                   (0 << 8) |
2566                   (0 << 0));
2567     OUT_BCS_BATCH(batch, 
2568                   (0 << 29) |
2569                   (1 << 27) |   /* disable Deblocking */
2570                   (0 << 24) |
2571                   (gen7_jpeg_wa_clip.qp << 16) |
2572                   (0 << 8) |
2573                   (0 << 0));
2574     OUT_BCS_BATCH(batch, 
2575                   (slice_ver_pos << 24) |
2576                   (slice_hor_pos << 16) | 
2577                   (first_mb_in_slice << 0));
2578     OUT_BCS_BATCH(batch,
2579                   (next_slice_ver_pos << 16) |
2580                   (next_slice_hor_pos << 0));
2581     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2582     OUT_BCS_BATCH(batch, 0);
2583     OUT_BCS_BATCH(batch, 0);
2584     OUT_BCS_BATCH(batch, 0);
2585     OUT_BCS_BATCH(batch, 0);
2586     ADVANCE_BCS_BATCH(batch);
2587 }
2588
2589 static void
2590 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2591                  struct gen7_mfd_context *gen7_mfd_context)
2592 {
2593     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2594     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2595     intel_batchbuffer_emit_mi_flush(batch);
2596     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2597     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2598     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2599     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2600     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2601     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2602     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2603
2604     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2605     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2606     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2607 }
2608
2609 #endif
2610
2611 void
2612 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2613                              struct decode_state *decode_state,
2614                              struct gen7_mfd_context *gen7_mfd_context)
2615 {
2616     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2617     VAPictureParameterBufferJPEGBaseline *pic_param;
2618     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2619     dri_bo *slice_data_bo;
2620     int i, j, max_selector = 0;
2621
2622     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2623     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2624
2625     /* Currently only support Baseline DCT */
2626     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2627     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2628 #ifdef JPEG_WA
2629     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2630 #endif
2631     intel_batchbuffer_emit_mi_flush(batch);
2632     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2633     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2634     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2635     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2636     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2637
2638     for (j = 0; j < decode_state->num_slice_params; j++) {
2639         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2640         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2641         slice_data_bo = decode_state->slice_datas[j]->bo;
2642         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2643
2644         if (j == decode_state->num_slice_params - 1)
2645             next_slice_group_param = NULL;
2646         else
2647             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2648
2649         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2650             int component;
2651
2652             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2653
2654             if (i < decode_state->slice_params[j]->num_elements - 1)
2655                 next_slice_param = slice_param + 1;
2656             else
2657                 next_slice_param = next_slice_group_param;
2658
2659             for (component = 0; component < slice_param->num_components; component++) {
2660                 if (max_selector < slice_param->components[component].dc_table_selector)
2661                     max_selector = slice_param->components[component].dc_table_selector;
2662
2663                 if (max_selector < slice_param->components[component].ac_table_selector)
2664                     max_selector = slice_param->components[component].ac_table_selector;
2665             }
2666
2667             slice_param++;
2668         }
2669     }
2670
2671     assert(max_selector < 2);
2672     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2673
2674     for (j = 0; j < decode_state->num_slice_params; j++) {
2675         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2676         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2677         slice_data_bo = decode_state->slice_datas[j]->bo;
2678         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2679
2680         if (j == decode_state->num_slice_params - 1)
2681             next_slice_group_param = NULL;
2682         else
2683             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2684
2685         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2686             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2687
2688             if (i < decode_state->slice_params[j]->num_elements - 1)
2689                 next_slice_param = slice_param + 1;
2690             else
2691                 next_slice_param = next_slice_group_param;
2692
2693             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2694             slice_param++;
2695         }
2696     }
2697
2698     intel_batchbuffer_end_atomic(batch);
2699     intel_batchbuffer_flush(batch);
2700 }
2701
2702 static const int vp8_dc_qlookup[128] =
2703 {
2704       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2705      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2706      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2707      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2708      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2709      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2710      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2711     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2712 };
2713
2714 static const int vp8_ac_qlookup[128] =
2715 {
2716       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2717      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2718      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2719      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2720      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2721     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2722     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2723     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2724 };
2725
2726 static inline unsigned int vp8_clip_quantization_index(int index)
2727 {
2728     if(index > 127)
2729         return 127;
2730     else if(index <0)
2731         return 0;
2732
2733     return index;
2734 }
2735
2736 static void
2737 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2738                           struct decode_state *decode_state,
2739                           struct gen7_mfd_context *gen7_mfd_context)
2740 {
2741     struct object_surface *obj_surface;
2742     struct i965_driver_data *i965 = i965_driver_data(ctx);
2743     dri_bo *bo;
2744     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2745     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2746     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2747
2748     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2749     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2750
2751     intel_update_vp8_frame_store_index(ctx,
2752                                        decode_state,
2753                                        pic_param,
2754                                        gen7_mfd_context->reference_surface);
2755
2756     /* Current decoded picture */
2757     obj_surface = decode_state->render_object;
2758     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2759
2760     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2761     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2762     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2763     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2764
2765     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2766     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2767     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2768     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2769
2770     intel_ensure_vp8_segmentation_buffer(ctx,
2771         &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2772
2773     /* The same as AVC */
2774     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2775     bo = dri_bo_alloc(i965->intel.bufmgr,
2776                       "intra row store",
2777                       width_in_mbs * 64,
2778                       0x1000);
2779     assert(bo);
2780     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2781     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2782
2783     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2784     bo = dri_bo_alloc(i965->intel.bufmgr,
2785                       "deblocking filter row store",
2786                       width_in_mbs * 64 * 4,
2787                       0x1000);
2788     assert(bo);
2789     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2790     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2791
2792     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2793     bo = dri_bo_alloc(i965->intel.bufmgr,
2794                       "bsd mpc row store",
2795                       width_in_mbs * 64 * 2,
2796                       0x1000);
2797     assert(bo);
2798     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2799     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2800
2801     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2802     bo = dri_bo_alloc(i965->intel.bufmgr,
2803                       "mpr row store",
2804                       width_in_mbs * 64 * 2,
2805                       0x1000);
2806     assert(bo);
2807     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2808     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2809
2810     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2811 }
2812
2813 static void
2814 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2815                        struct decode_state *decode_state,
2816                        struct gen7_mfd_context *gen7_mfd_context)
2817 {
2818     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2819     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2820     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2821     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2822     dri_bo *probs_bo = decode_state->probability_data->bo;
2823     int i, j,log2num;
2824     unsigned int quantization_value[4][6];
2825
2826     /* There is no safe way to error out if the segmentation buffer
2827        could not be allocated. So, instead of aborting, simply decode
2828        something even if the result may look totally inacurate */
2829     const unsigned int enable_segmentation =
2830         pic_param->pic_fields.bits.segmentation_enabled &&
2831         gen7_mfd_context->segmentation_buffer.valid;
2832         
2833     log2num = (int)log2(slice_param->num_of_partitions - 1);
2834
2835     BEGIN_BCS_BATCH(batch, 38);
2836     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2837     OUT_BCS_BATCH(batch,
2838                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2839                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2840     OUT_BCS_BATCH(batch,
2841                   log2num << 24 |
2842                   pic_param->pic_fields.bits.sharpness_level << 16 |
2843                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2844                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2845                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2846                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2847                   (enable_segmentation &&
2848                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2849                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2850                   (enable_segmentation &&
2851                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2852                   (enable_segmentation &&
2853                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2854                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2855                   pic_param->pic_fields.bits.filter_type << 4 |
2856                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2857                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2858
2859     OUT_BCS_BATCH(batch,
2860                   pic_param->loop_filter_level[3] << 24 |
2861                   pic_param->loop_filter_level[2] << 16 |
2862                   pic_param->loop_filter_level[1] <<  8 |
2863                   pic_param->loop_filter_level[0] <<  0);
2864
2865     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2866     for (i = 0; i < 4; i++) {
2867                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2868                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2869                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2870                 /* 101581>>16 is equivalent to 155/100 */
2871                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2872                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2873                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2874
2875                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2876                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2877
2878                 OUT_BCS_BATCH(batch,
2879                       quantization_value[i][0] << 16 | /* Y1AC */
2880                       quantization_value[i][1] <<  0); /* Y1DC */
2881         OUT_BCS_BATCH(batch,
2882                       quantization_value[i][5] << 16 | /* UVAC */
2883                       quantization_value[i][4] <<  0); /* UVDC */
2884         OUT_BCS_BATCH(batch,
2885                       quantization_value[i][3] << 16 | /* Y2AC */
2886                       quantization_value[i][2] <<  0); /* Y2DC */
2887     }
2888
2889     /* CoeffProbability table for non-key frame, DW16-DW18 */
2890     if (probs_bo) {
2891         OUT_BCS_RELOC(batch, probs_bo,
2892                       0, I915_GEM_DOMAIN_INSTRUCTION,
2893                       0);
2894         OUT_BCS_BATCH(batch, 0);
2895         OUT_BCS_BATCH(batch, 0);
2896     } else {
2897         OUT_BCS_BATCH(batch, 0);
2898         OUT_BCS_BATCH(batch, 0);
2899         OUT_BCS_BATCH(batch, 0);
2900     }
2901
2902     OUT_BCS_BATCH(batch,
2903                   pic_param->mb_segment_tree_probs[2] << 16 |
2904                   pic_param->mb_segment_tree_probs[1] <<  8 |
2905                   pic_param->mb_segment_tree_probs[0] <<  0);
2906
2907     OUT_BCS_BATCH(batch,
2908                   pic_param->prob_skip_false << 24 |
2909                   pic_param->prob_intra      << 16 |
2910                   pic_param->prob_last       <<  8 |
2911                   pic_param->prob_gf         <<  0);
2912
2913     OUT_BCS_BATCH(batch,
2914                   pic_param->y_mode_probs[3] << 24 |
2915                   pic_param->y_mode_probs[2] << 16 |
2916                   pic_param->y_mode_probs[1] <<  8 |
2917                   pic_param->y_mode_probs[0] <<  0);
2918
2919     OUT_BCS_BATCH(batch,
2920                   pic_param->uv_mode_probs[2] << 16 |
2921                   pic_param->uv_mode_probs[1] <<  8 |
2922                   pic_param->uv_mode_probs[0] <<  0);
2923     
2924     /* MV update value, DW23-DW32 */
2925     for (i = 0; i < 2; i++) {
2926         for (j = 0; j < 20; j += 4) {
2927             OUT_BCS_BATCH(batch,
2928                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2929                           pic_param->mv_probs[i][j + 2] << 16 |
2930                           pic_param->mv_probs[i][j + 1] <<  8 |
2931                           pic_param->mv_probs[i][j + 0] <<  0);
2932         }
2933     }
2934
2935     OUT_BCS_BATCH(batch,
2936                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2937                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2938                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2939                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2940
2941     OUT_BCS_BATCH(batch,
2942                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2943                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2944                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2945                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2946
2947     /* segmentation id stream base address, DW35-DW37 */
2948     if (enable_segmentation) {
2949         OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2950                       0, I915_GEM_DOMAIN_INSTRUCTION,
2951                       0);
2952         OUT_BCS_BATCH(batch, 0);
2953         OUT_BCS_BATCH(batch, 0);
2954     }
2955     else {
2956         OUT_BCS_BATCH(batch, 0);
2957         OUT_BCS_BATCH(batch, 0);
2958         OUT_BCS_BATCH(batch, 0);
2959     }
2960     ADVANCE_BCS_BATCH(batch);
2961 }
2962
2963 static void
2964 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2965                         VAPictureParameterBufferVP8 *pic_param,
2966                         VASliceParameterBufferVP8 *slice_param,
2967                         dri_bo *slice_data_bo,
2968                         struct gen7_mfd_context *gen7_mfd_context)
2969 {
2970     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2971     int i, log2num;
2972     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2973     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2974     unsigned int partition_size_0 = slice_param->partition_size[0];
2975
2976     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2977     if (used_bits == 8) {
2978         used_bits = 0;
2979         offset += 1;
2980         partition_size_0 -= 1;
2981     }
2982
2983     assert(slice_param->num_of_partitions >= 2);
2984     assert(slice_param->num_of_partitions <= 9);
2985
2986     log2num = (int)log2(slice_param->num_of_partitions - 1);
2987
2988     BEGIN_BCS_BATCH(batch, 22);
2989     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2990     OUT_BCS_BATCH(batch,
2991                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2992                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2993                   log2num << 4 |
2994                   (slice_param->macroblock_offset & 0x7));
2995     OUT_BCS_BATCH(batch,
2996                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2997                   0);
2998
2999     OUT_BCS_BATCH(batch, partition_size_0 + 1);
3000     OUT_BCS_BATCH(batch, offset);
3001     //partion sizes in bytes are present after the above first partition when there are more than one token partition
3002     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3003     for (i = 1; i < 9; i++) {
3004         if (i < slice_param->num_of_partitions) {
3005             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3006             OUT_BCS_BATCH(batch, offset);
3007         } else {
3008             OUT_BCS_BATCH(batch, 0);
3009             OUT_BCS_BATCH(batch, 0);
3010         }
3011
3012         offset += slice_param->partition_size[i];
3013     }
3014
3015     OUT_BCS_BATCH(batch, 0); /* concealment method */
3016
3017     ADVANCE_BCS_BATCH(batch);
3018 }
3019
3020 void
3021 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3022                             struct decode_state *decode_state,
3023                             struct gen7_mfd_context *gen7_mfd_context)
3024 {
3025     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3026     VAPictureParameterBufferVP8 *pic_param;
3027     VASliceParameterBufferVP8 *slice_param;
3028     dri_bo *slice_data_bo;
3029
3030     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3031     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3032
3033     /* one slice per frame */
3034     if (decode_state->num_slice_params != 1 ||
3035         (!decode_state->slice_params ||
3036          !decode_state->slice_params[0] ||
3037          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3038         (!decode_state->slice_datas ||
3039          !decode_state->slice_datas[0] ||
3040          !decode_state->slice_datas[0]->bo) ||
3041         !decode_state->probability_data) {
3042         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3043
3044         return;
3045     }
3046
3047     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3048     slice_data_bo = decode_state->slice_datas[0]->bo;
3049
3050     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3051     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3052     intel_batchbuffer_emit_mi_flush(batch);
3053     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3054     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3055     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3056     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3057     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3058     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3059     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3060     intel_batchbuffer_end_atomic(batch);
3061     intel_batchbuffer_flush(batch);
3062 }
3063
3064 static VAStatus
3065 gen8_mfd_decode_picture(VADriverContextP ctx, 
3066                         VAProfile profile, 
3067                         union codec_state *codec_state,
3068                         struct hw_context *hw_context)
3069
3070 {
3071     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3072     struct decode_state *decode_state = &codec_state->decode;
3073     VAStatus vaStatus;
3074
3075     assert(gen7_mfd_context);
3076
3077     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3078
3079     if (vaStatus != VA_STATUS_SUCCESS)
3080         goto out;
3081
3082     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3083
3084     switch (profile) {
3085     case VAProfileMPEG2Simple:
3086     case VAProfileMPEG2Main:
3087         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3088         break;
3089         
3090     case VAProfileH264ConstrainedBaseline:
3091     case VAProfileH264Main:
3092     case VAProfileH264High:
3093     case VAProfileH264StereoHigh:
3094     case VAProfileH264MultiviewHigh:
3095         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3096         break;
3097
3098     case VAProfileVC1Simple:
3099     case VAProfileVC1Main:
3100     case VAProfileVC1Advanced:
3101         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3102         break;
3103
3104     case VAProfileJPEGBaseline:
3105         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3106         break;
3107
3108     case VAProfileVP8Version0_3:
3109         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3110         break;
3111
3112     default:
3113         assert(0);
3114         break;
3115     }
3116
3117     vaStatus = VA_STATUS_SUCCESS;
3118
3119 out:
3120     return vaStatus;
3121 }
3122
3123 static void
3124 gen8_mfd_context_destroy(void *hw_context)
3125 {
3126     VADriverContextP ctx;
3127     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3128
3129     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3130
3131     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3132     gen7_mfd_context->post_deblocking_output.bo = NULL;
3133
3134     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3135     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3136
3137     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3138     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3139
3140     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3141     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3142
3143     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3144     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3145
3146     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3147     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3148
3149     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3150     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3151
3152     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3153     gen7_mfd_context->segmentation_buffer.bo = NULL;
3154
3155     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3156
3157     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3158         i965_DestroySurfaces(ctx,
3159                              &gen7_mfd_context->jpeg_wa_surface_id,
3160                              1);
3161         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3162     }
3163
3164     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3165     free(gen7_mfd_context);
3166 }
3167
3168 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3169                                     struct gen7_mfd_context *gen7_mfd_context)
3170 {
3171     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3172     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3173     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3174     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3175 }
3176
3177 struct hw_context *
3178 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3179 {
3180     struct intel_driver_data *intel = intel_driver_data(ctx);
3181     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3182     int i;
3183
3184     if (!gen7_mfd_context)
3185         return NULL;
3186
3187     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3188     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3189     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3190
3191     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3192         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3193         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3194     }
3195
3196     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3197     gen7_mfd_context->segmentation_buffer.valid = 0;
3198
3199     switch (obj_config->profile) {
3200     case VAProfileMPEG2Simple:
3201     case VAProfileMPEG2Main:
3202         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3203         break;
3204
3205     case VAProfileH264ConstrainedBaseline:
3206     case VAProfileH264Main:
3207     case VAProfileH264High:
3208     case VAProfileH264StereoHigh:
3209     case VAProfileH264MultiviewHigh:
3210         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3211         break;
3212     default:
3213         break;
3214     }
3215
3216     gen7_mfd_context->driver_context = ctx;
3217     return (struct hw_context *)gen7_mfd_context;
3218 }