OSDN Git Service

Clean up the duplicate extra two line of code
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77         gen7_avc_surface->base.frame_store_id = -1;
78         assert((obj_surface->size & 0x3f) == 0);
79         obj_surface->private_data = gen7_avc_surface;
80     }
81
82     /* DMV buffers now relate to the whole frame, irrespective of
83        field coding modes */
84     if (gen7_avc_surface->dmv_top == NULL) {
85         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86                                                  "direct mv w/r buffer",
87                                                  width_in_mbs * height_in_mbs * 128,
88                                                  0x1000);
89         assert(gen7_avc_surface->dmv_top);
90     }
91 }
92
93 static void
94 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
95                           struct decode_state *decode_state,
96                           int standard_select,
97                           struct gen7_mfd_context *gen7_mfd_context)
98 {
99     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
100
101     assert(standard_select == MFX_FORMAT_MPEG2 ||
102            standard_select == MFX_FORMAT_AVC ||
103            standard_select == MFX_FORMAT_VC1 ||
104            standard_select == MFX_FORMAT_JPEG ||
105            standard_select == MFX_FORMAT_VP8);
106
107     BEGIN_BCS_BATCH(batch, 5);
108     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
109     OUT_BCS_BATCH(batch,
110                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
111                   (MFD_MODE_VLD << 15) | /* VLD mode */
112                   (0 << 10) | /* disable Stream-Out */
113                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
114                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
115                   (0 << 5)  | /* not in stitch mode */
116                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
117                   (standard_select << 0));
118     OUT_BCS_BATCH(batch,
119                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
120                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
121                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
122                   (0 << 1)  |
123                   (0 << 0));
124     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
125     OUT_BCS_BATCH(batch, 0); /* reserved */
126     ADVANCE_BCS_BATCH(batch);
127 }
128
129 static void
130 gen8_mfd_surface_state(VADriverContextP ctx,
131                        struct decode_state *decode_state,
132                        int standard_select,
133                        struct gen7_mfd_context *gen7_mfd_context)
134 {
135     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
136     struct object_surface *obj_surface = decode_state->render_object;
137     unsigned int y_cb_offset;
138     unsigned int y_cr_offset;
139     unsigned int surface_format;
140
141     assert(obj_surface);
142
143     y_cb_offset = obj_surface->y_cb_offset;
144     y_cr_offset = obj_surface->y_cr_offset;
145
146     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
147         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
148
149     BEGIN_BCS_BATCH(batch, 6);
150     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
151     OUT_BCS_BATCH(batch, 0);
152     OUT_BCS_BATCH(batch,
153                   ((obj_surface->orig_height - 1) << 18) |
154                   ((obj_surface->orig_width - 1) << 4));
155     OUT_BCS_BATCH(batch,
156                   (surface_format << 28) | /* 420 planar YUV surface */
157                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
158                   (0 << 22) | /* surface object control state, ignored */
159                   ((obj_surface->width - 1) << 3) | /* pitch */
160                   (0 << 2)  | /* must be 0 */
161                   (1 << 1)  | /* must be tiled */
162                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
163     OUT_BCS_BATCH(batch,
164                   (0 << 16) | /* X offset for U(Cb), must be 0 */
165                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
166     OUT_BCS_BATCH(batch,
167                   (0 << 16) | /* X offset for V(Cr), must be 0 */
168                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
169     ADVANCE_BCS_BATCH(batch);
170 }
171
172 static void
173 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
174                              struct decode_state *decode_state,
175                              int standard_select,
176                              struct gen7_mfd_context *gen7_mfd_context)
177 {
178     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
179     int i;
180
181     BEGIN_BCS_BATCH(batch, 61);
182     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
183         /* Pre-deblock 1-3 */
184     if (gen7_mfd_context->pre_deblocking_output.valid)
185         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
186                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
187                       0);
188     else
189         OUT_BCS_BATCH(batch, 0);
190
191         OUT_BCS_BATCH(batch, 0);
192         OUT_BCS_BATCH(batch, 0);
193         /* Post-debloing 4-6 */
194     if (gen7_mfd_context->post_deblocking_output.valid)
195         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
196                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197                       0);
198     else
199         OUT_BCS_BATCH(batch, 0);
200
201         OUT_BCS_BATCH(batch, 0);
202         OUT_BCS_BATCH(batch, 0);
203
204         /* uncompressed-video & stream out 7-12 */
205     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
207         OUT_BCS_BATCH(batch, 0);
208         OUT_BCS_BATCH(batch, 0);
209         OUT_BCS_BATCH(batch, 0);
210         OUT_BCS_BATCH(batch, 0);
211
212         /* intra row-store scratch 13-15 */
213     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
214         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
215                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
216                       0);
217     else
218         OUT_BCS_BATCH(batch, 0);
219
220         OUT_BCS_BATCH(batch, 0);
221         OUT_BCS_BATCH(batch, 0);
222         /* deblocking-filter-row-store 16-18 */
223     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
224         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
225                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
226                       0);
227     else
228         OUT_BCS_BATCH(batch, 0);
229         OUT_BCS_BATCH(batch, 0);
230         OUT_BCS_BATCH(batch, 0);
231
232     /* DW 19..50 */
233     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
234         struct object_surface *obj_surface;
235
236         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
237             gen7_mfd_context->reference_surface[i].obj_surface &&
238             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
239             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
240
241             OUT_BCS_RELOC(batch, obj_surface->bo,
242                           I915_GEM_DOMAIN_INSTRUCTION, 0,
243                           0);
244         } else {
245             OUT_BCS_BATCH(batch, 0);
246         }
247         
248         OUT_BCS_BATCH(batch, 0);
249     }
250     
251     /* reference property 51 */
252     OUT_BCS_BATCH(batch, 0);  
253         
254     /* Macroblock status & ILDB 52-57 */
255     OUT_BCS_BATCH(batch, 0);
256     OUT_BCS_BATCH(batch, 0);
257     OUT_BCS_BATCH(batch, 0);
258     OUT_BCS_BATCH(batch, 0);
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261
262     /* the second Macroblock status 58-60 */    
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265     OUT_BCS_BATCH(batch, 0);
266
267     ADVANCE_BCS_BATCH(batch);
268 }
269
270 static void
271 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
272                                  dri_bo *slice_data_bo,
273                                  int standard_select,
274                                  struct gen7_mfd_context *gen7_mfd_context)
275 {
276     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
277
278     BEGIN_BCS_BATCH(batch, 26);
279     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
280         /* MFX In BS 1-5 */
281     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
282     OUT_BCS_BATCH(batch, 0);
283     OUT_BCS_BATCH(batch, 0);
284         /* Upper bound 4-5 */   
285     OUT_BCS_BATCH(batch, 0);
286     OUT_BCS_BATCH(batch, 0);
287
288         /* MFX indirect MV 6-10 */
289     OUT_BCS_BATCH(batch, 0);
290     OUT_BCS_BATCH(batch, 0);
291     OUT_BCS_BATCH(batch, 0);
292     OUT_BCS_BATCH(batch, 0);
293     OUT_BCS_BATCH(batch, 0);
294         
295         /* MFX IT_COFF 11-15 */
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299     OUT_BCS_BATCH(batch, 0);
300     OUT_BCS_BATCH(batch, 0);
301
302         /* MFX IT_DBLK 16-20 */
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0);
307     OUT_BCS_BATCH(batch, 0);
308
309         /* MFX PAK_BSE object for encoder 21-25 */
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313     OUT_BCS_BATCH(batch, 0);
314     OUT_BCS_BATCH(batch, 0);
315
316     ADVANCE_BCS_BATCH(batch);
317 }
318
319 static void
320 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
321                                  struct decode_state *decode_state,
322                                  int standard_select,
323                                  struct gen7_mfd_context *gen7_mfd_context)
324 {
325     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
326
327     BEGIN_BCS_BATCH(batch, 10);
328     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
329
330     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
331         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
332                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
333                       0);
334         else
335                 OUT_BCS_BATCH(batch, 0);
336                 
337     OUT_BCS_BATCH(batch, 0);
338     OUT_BCS_BATCH(batch, 0);
339         /* MPR Row Store Scratch buffer 4-6 */
340     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
341         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
342                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
343                       0);
344     else
345         OUT_BCS_BATCH(batch, 0);
346
347     OUT_BCS_BATCH(batch, 0);
348     OUT_BCS_BATCH(batch, 0);
349
350         /* Bitplane 7-9 */ 
351     if (gen7_mfd_context->bitplane_read_buffer.valid)
352         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
353                       I915_GEM_DOMAIN_INSTRUCTION, 0,
354                       0);
355     else
356         OUT_BCS_BATCH(batch, 0);
357     OUT_BCS_BATCH(batch, 0);
358     OUT_BCS_BATCH(batch, 0);
359     ADVANCE_BCS_BATCH(batch);
360 }
361
362 static void
363 gen8_mfd_qm_state(VADriverContextP ctx,
364                   int qm_type,
365                   unsigned char *qm,
366                   int qm_length,
367                   struct gen7_mfd_context *gen7_mfd_context)
368 {
369     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
370     unsigned int qm_buffer[16];
371
372     assert(qm_length <= 16 * 4);
373     memcpy(qm_buffer, qm, qm_length);
374
375     BEGIN_BCS_BATCH(batch, 18);
376     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
377     OUT_BCS_BATCH(batch, qm_type << 0);
378     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
379     ADVANCE_BCS_BATCH(batch);
380 }
381
382 static void
383 gen8_mfd_avc_img_state(VADriverContextP ctx,
384                        struct decode_state *decode_state,
385                        struct gen7_mfd_context *gen7_mfd_context)
386 {
387     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
388     int img_struct;
389     int mbaff_frame_flag;
390     unsigned int width_in_mbs, height_in_mbs;
391     VAPictureParameterBufferH264 *pic_param;
392
393     assert(decode_state->pic_param && decode_state->pic_param->buffer);
394     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
395     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
396
397     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
398         img_struct = 1;
399     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
400         img_struct = 3;
401     else
402         img_struct = 0;
403
404     if ((img_struct & 0x1) == 0x1) {
405         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
406     } else {
407         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
408     }
409
410     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
411         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
412         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
413     } else {
414         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
415     }
416
417     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
418                         !pic_param->pic_fields.bits.field_pic_flag);
419
420     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
421     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
422
423     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
424     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
425            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
426     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
427
428     BEGIN_BCS_BATCH(batch, 17);
429     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
430     OUT_BCS_BATCH(batch, 
431                   (width_in_mbs * height_in_mbs - 1));
432     OUT_BCS_BATCH(batch, 
433                   ((height_in_mbs - 1) << 16) | 
434                   ((width_in_mbs - 1) << 0));
435     OUT_BCS_BATCH(batch, 
436                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
437                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
438                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
439                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
440                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
441                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
442                   (img_struct << 8));
443     OUT_BCS_BATCH(batch,
444                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
445                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
446                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
447                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
448                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
449                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
450                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
451                   (mbaff_frame_flag << 1) |
452                   (pic_param->pic_fields.bits.field_pic_flag << 0));
453     OUT_BCS_BATCH(batch, 0);
454     OUT_BCS_BATCH(batch, 0);
455     OUT_BCS_BATCH(batch, 0);
456     OUT_BCS_BATCH(batch, 0);
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     OUT_BCS_BATCH(batch, 0);
465     ADVANCE_BCS_BATCH(batch);
466 }
467
468 static void
469 gen8_mfd_avc_qm_state(VADriverContextP ctx,
470                       struct decode_state *decode_state,
471                       struct gen7_mfd_context *gen7_mfd_context)
472 {
473     VAIQMatrixBufferH264 *iq_matrix;
474     VAPictureParameterBufferH264 *pic_param;
475
476     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
477         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
478     else
479         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
480
481     assert(decode_state->pic_param && decode_state->pic_param->buffer);
482     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
483
484     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
485     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
486
487     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
488         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
489         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
490     }
491 }
492
493 static inline void
494 gen8_mfd_avc_picid_state(VADriverContextP ctx,
495     struct decode_state *decode_state,
496     struct gen7_mfd_context *gen7_mfd_context)
497 {
498     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
499         gen7_mfd_context->reference_surface);
500 }
501
502 static void
503 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
504                               struct decode_state *decode_state,
505                               VAPictureParameterBufferH264 *pic_param,
506                               VASliceParameterBufferH264 *slice_param,
507                               struct gen7_mfd_context *gen7_mfd_context)
508 {
509     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
510     struct object_surface *obj_surface;
511     GenAvcSurface *gen7_avc_surface;
512     VAPictureH264 *va_pic;
513     int i;
514
515     BEGIN_BCS_BATCH(batch, 71);
516     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
517
518     /* reference surfaces 0..15 */
519     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
520         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
521             gen7_mfd_context->reference_surface[i].obj_surface &&
522             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
523
524             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
525             gen7_avc_surface = obj_surface->private_data;
526
527             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
528                           I915_GEM_DOMAIN_INSTRUCTION, 0,
529                           0);
530             OUT_BCS_BATCH(batch, 0);
531         } else {
532             OUT_BCS_BATCH(batch, 0);
533             OUT_BCS_BATCH(batch, 0);
534         }
535     }
536     
537     OUT_BCS_BATCH(batch, 0);
538
539     /* the current decoding frame/field */
540     va_pic = &pic_param->CurrPic;
541     obj_surface = decode_state->render_object;
542     assert(obj_surface->bo && obj_surface->private_data);
543     gen7_avc_surface = obj_surface->private_data;
544
545     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
546                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
547                   0);
548
549     OUT_BCS_BATCH(batch, 0);
550     OUT_BCS_BATCH(batch, 0);
551
552     /* POC List */
553     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
554         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
555
556         if (obj_surface) {
557             const VAPictureH264 * const va_pic = avc_find_picture(
558                 obj_surface->base.id, pic_param->ReferenceFrames,
559                 ARRAY_ELEMS(pic_param->ReferenceFrames));
560
561             assert(va_pic != NULL);
562             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
563             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
564         } else {
565             OUT_BCS_BATCH(batch, 0);
566             OUT_BCS_BATCH(batch, 0);
567         }
568     }
569
570     va_pic = &pic_param->CurrPic;
571     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
572     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
573
574     ADVANCE_BCS_BATCH(batch);
575 }
576
577 static void
578 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
579                                  VAPictureParameterBufferH264 *pic_param,
580                                  VASliceParameterBufferH264 *next_slice_param,
581                                  struct gen7_mfd_context *gen7_mfd_context)
582 {
583     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
584 }
585
586 static void
587 gen8_mfd_avc_slice_state(VADriverContextP ctx,
588                          VAPictureParameterBufferH264 *pic_param,
589                          VASliceParameterBufferH264 *slice_param,
590                          VASliceParameterBufferH264 *next_slice_param,
591                          struct gen7_mfd_context *gen7_mfd_context)
592 {
593     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
594     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
595     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
596     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
597     int num_ref_idx_l0, num_ref_idx_l1;
598     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
599                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
600     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
601     int slice_type;
602
603     if (slice_param->slice_type == SLICE_TYPE_I ||
604         slice_param->slice_type == SLICE_TYPE_SI) {
605         slice_type = SLICE_TYPE_I;
606     } else if (slice_param->slice_type == SLICE_TYPE_P ||
607                slice_param->slice_type == SLICE_TYPE_SP) {
608         slice_type = SLICE_TYPE_P;
609     } else { 
610         assert(slice_param->slice_type == SLICE_TYPE_B);
611         slice_type = SLICE_TYPE_B;
612     }
613
614     if (slice_type == SLICE_TYPE_I) {
615         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
616         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
617         num_ref_idx_l0 = 0;
618         num_ref_idx_l1 = 0;
619     } else if (slice_type == SLICE_TYPE_P) {
620         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
621         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
622         num_ref_idx_l1 = 0;
623     } else {
624         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
625         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
626     }
627
628     first_mb_in_slice = slice_param->first_mb_in_slice;
629     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
630     slice_ver_pos = first_mb_in_slice / width_in_mbs;
631
632     if (mbaff_picture)
633         slice_ver_pos = slice_ver_pos << 1;
634     if (next_slice_param) {
635         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
636         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
637         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
638
639         if (mbaff_picture)
640             next_slice_ver_pos = next_slice_ver_pos << 1;
641     } else {
642         next_slice_hor_pos = 0;
643         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
644     }
645
646     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
647     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
648     OUT_BCS_BATCH(batch, slice_type);
649     OUT_BCS_BATCH(batch, 
650                   (num_ref_idx_l1 << 24) |
651                   (num_ref_idx_l0 << 16) |
652                   (slice_param->chroma_log2_weight_denom << 8) |
653                   (slice_param->luma_log2_weight_denom << 0));
654     OUT_BCS_BATCH(batch, 
655                   (slice_param->direct_spatial_mv_pred_flag << 29) |
656                   (slice_param->disable_deblocking_filter_idc << 27) |
657                   (slice_param->cabac_init_idc << 24) |
658                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
659                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
660                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
661     OUT_BCS_BATCH(batch, 
662                   (slice_ver_pos << 24) |
663                   (slice_hor_pos << 16) | 
664                   (first_mb_in_slice << 0));
665     OUT_BCS_BATCH(batch,
666                   (next_slice_ver_pos << 16) |
667                   (next_slice_hor_pos << 0));
668     OUT_BCS_BATCH(batch, 
669                   (next_slice_param == NULL) << 19); /* last slice flag */
670     OUT_BCS_BATCH(batch, 0);
671     OUT_BCS_BATCH(batch, 0);
672     OUT_BCS_BATCH(batch, 0);
673     OUT_BCS_BATCH(batch, 0);
674     ADVANCE_BCS_BATCH(batch);
675 }
676
677 static inline void
678 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
679                            VAPictureParameterBufferH264 *pic_param,
680                            VASliceParameterBufferH264 *slice_param,
681                            struct gen7_mfd_context *gen7_mfd_context)
682 {
683     gen6_send_avc_ref_idx_state(
684         gen7_mfd_context->base.batch,
685         slice_param,
686         gen7_mfd_context->reference_surface
687     );
688 }
689
690 static void
691 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
692                                 VAPictureParameterBufferH264 *pic_param,
693                                 VASliceParameterBufferH264 *slice_param,
694                                 struct gen7_mfd_context *gen7_mfd_context)
695 {
696     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
697     int i, j, num_weight_offset_table = 0;
698     short weightoffsets[32 * 6];
699
700     if ((slice_param->slice_type == SLICE_TYPE_P ||
701          slice_param->slice_type == SLICE_TYPE_SP) &&
702         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
703         num_weight_offset_table = 1;
704     }
705     
706     if ((slice_param->slice_type == SLICE_TYPE_B) &&
707         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
708         num_weight_offset_table = 2;
709     }
710
711     for (i = 0; i < num_weight_offset_table; i++) {
712         BEGIN_BCS_BATCH(batch, 98);
713         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
714         OUT_BCS_BATCH(batch, i);
715
716         if (i == 0) {
717             for (j = 0; j < 32; j++) {
718                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
719                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
720                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
721                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
722                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
723                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
724             }
725         } else {
726             for (j = 0; j < 32; j++) {
727                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
728                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
729                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
730                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
731                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
732                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
733             }
734         }
735
736         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
737         ADVANCE_BCS_BATCH(batch);
738     }
739 }
740
741 static void
742 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
743                         VAPictureParameterBufferH264 *pic_param,
744                         VASliceParameterBufferH264 *slice_param,
745                         dri_bo *slice_data_bo,
746                         VASliceParameterBufferH264 *next_slice_param,
747                         struct gen7_mfd_context *gen7_mfd_context)
748 {
749     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
750     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
751                                                             slice_param,
752                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
753
754     /* the input bitsteam format on GEN7 differs from GEN6 */
755     BEGIN_BCS_BATCH(batch, 6);
756     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
757     OUT_BCS_BATCH(batch, 
758                   (slice_param->slice_data_size));
759     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
760     OUT_BCS_BATCH(batch,
761                   (0 << 31) |
762                   (0 << 14) |
763                   (0 << 12) |
764                   (0 << 10) |
765                   (0 << 8));
766     OUT_BCS_BATCH(batch,
767                   ((slice_data_bit_offset >> 3) << 16) |
768                   (1 << 7)  |
769                   (0 << 5)  |
770                   (0 << 4)  |
771                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
772                   (slice_data_bit_offset & 0x7));
773     OUT_BCS_BATCH(batch, 0);
774     ADVANCE_BCS_BATCH(batch);
775 }
776
777 static inline void
778 gen8_mfd_avc_context_init(
779     VADriverContextP         ctx,
780     struct gen7_mfd_context *gen7_mfd_context
781 )
782 {
783     /* Initialize flat scaling lists */
784     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
785 }
786
787 static void
788 gen8_mfd_avc_decode_init(VADriverContextP ctx,
789                          struct decode_state *decode_state,
790                          struct gen7_mfd_context *gen7_mfd_context)
791 {
792     VAPictureParameterBufferH264 *pic_param;
793     VASliceParameterBufferH264 *slice_param;
794     struct i965_driver_data *i965 = i965_driver_data(ctx);
795     struct object_surface *obj_surface;
796     dri_bo *bo;
797     int i, j, enable_avc_ildb = 0;
798     unsigned int width_in_mbs, height_in_mbs;
799
800     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
801         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
802         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
803
804         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
805             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
806             assert((slice_param->slice_type == SLICE_TYPE_I) ||
807                    (slice_param->slice_type == SLICE_TYPE_SI) ||
808                    (slice_param->slice_type == SLICE_TYPE_P) ||
809                    (slice_param->slice_type == SLICE_TYPE_SP) ||
810                    (slice_param->slice_type == SLICE_TYPE_B));
811
812             if (slice_param->disable_deblocking_filter_idc != 1) {
813                 enable_avc_ildb = 1;
814                 break;
815             }
816
817             slice_param++;
818         }
819     }
820
821     assert(decode_state->pic_param && decode_state->pic_param->buffer);
822     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
823     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
824         gen7_mfd_context->reference_surface);
825     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
826     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
827     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
828     assert(height_in_mbs > 0 && height_in_mbs <= 256);
829
830     /* Current decoded picture */
831     obj_surface = decode_state->render_object;
832     if (pic_param->pic_fields.bits.reference_pic_flag)
833         obj_surface->flags |= SURFACE_REFERENCED;
834     else
835         obj_surface->flags &= ~SURFACE_REFERENCED;
836
837     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
838     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
839
840     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
841     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
842     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
843     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
844
845     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
846     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
847     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
848     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
849
850     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
851     bo = dri_bo_alloc(i965->intel.bufmgr,
852                       "intra row store",
853                       width_in_mbs * 64,
854                       0x1000);
855     assert(bo);
856     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
857     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
858
859     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
860     bo = dri_bo_alloc(i965->intel.bufmgr,
861                       "deblocking filter row store",
862                       width_in_mbs * 64 * 4,
863                       0x1000);
864     assert(bo);
865     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
866     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
867
868     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
869     bo = dri_bo_alloc(i965->intel.bufmgr,
870                       "bsd mpc row store",
871                       width_in_mbs * 64 * 2,
872                       0x1000);
873     assert(bo);
874     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
875     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
876
877     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
878     bo = dri_bo_alloc(i965->intel.bufmgr,
879                       "mpr row store",
880                       width_in_mbs * 64 * 2,
881                       0x1000);
882     assert(bo);
883     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
884     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
885
886     gen7_mfd_context->bitplane_read_buffer.valid = 0;
887 }
888
889 static void
890 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
891                             struct decode_state *decode_state,
892                             struct gen7_mfd_context *gen7_mfd_context)
893 {
894     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
895     VAPictureParameterBufferH264 *pic_param;
896     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
897     dri_bo *slice_data_bo;
898     int i, j;
899
900     assert(decode_state->pic_param && decode_state->pic_param->buffer);
901     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
902     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
903
904     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
905     intel_batchbuffer_emit_mi_flush(batch);
906     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
907     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
908     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
909     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
910     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
911     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
912     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
913
914     for (j = 0; j < decode_state->num_slice_params; j++) {
915         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
916         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
917         slice_data_bo = decode_state->slice_datas[j]->bo;
918         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
919
920         if (j == decode_state->num_slice_params - 1)
921             next_slice_group_param = NULL;
922         else
923             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
924
925         if (j == 0 && slice_param->first_mb_in_slice)
926             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); 
927
928         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
929             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
930             assert((slice_param->slice_type == SLICE_TYPE_I) ||
931                    (slice_param->slice_type == SLICE_TYPE_SI) ||
932                    (slice_param->slice_type == SLICE_TYPE_P) ||
933                    (slice_param->slice_type == SLICE_TYPE_SP) ||
934                    (slice_param->slice_type == SLICE_TYPE_B));
935
936             if (i < decode_state->slice_params[j]->num_elements - 1)
937                 next_slice_param = slice_param + 1;
938             else
939                 next_slice_param = next_slice_group_param;
940
941             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
942             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
943             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
944             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
945             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
946             slice_param++;
947         }
948     }
949
950     intel_batchbuffer_end_atomic(batch);
951     intel_batchbuffer_flush(batch);
952 }
953
954 static void
955 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
956                            struct decode_state *decode_state,
957                            struct gen7_mfd_context *gen7_mfd_context)
958 {
959     VAPictureParameterBufferMPEG2 *pic_param;
960     struct i965_driver_data *i965 = i965_driver_data(ctx);
961     struct object_surface *obj_surface;
962     dri_bo *bo;
963     unsigned int width_in_mbs;
964
965     assert(decode_state->pic_param && decode_state->pic_param->buffer);
966     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
967     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
968
969     mpeg2_set_reference_surfaces(
970         ctx,
971         gen7_mfd_context->reference_surface,
972         decode_state,
973         pic_param
974     );
975
976     /* Current decoded picture */
977     obj_surface = decode_state->render_object;
978     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
979
980     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
981     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
982     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
983     gen7_mfd_context->pre_deblocking_output.valid = 1;
984
985     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
986     bo = dri_bo_alloc(i965->intel.bufmgr,
987                       "bsd mpc row store",
988                       width_in_mbs * 96,
989                       0x1000);
990     assert(bo);
991     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
992     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
993
994     gen7_mfd_context->post_deblocking_output.valid = 0;
995     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
996     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
997     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
998     gen7_mfd_context->bitplane_read_buffer.valid = 0;
999 }
1000
1001 static void
1002 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1003                          struct decode_state *decode_state,
1004                          struct gen7_mfd_context *gen7_mfd_context)
1005 {
1006     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1007     VAPictureParameterBufferMPEG2 *pic_param;
1008     unsigned int slice_concealment_disable_bit = 0;
1009
1010     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1011     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1012
1013     slice_concealment_disable_bit = 1;
1014
1015     BEGIN_BCS_BATCH(batch, 13);
1016     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1017     OUT_BCS_BATCH(batch,
1018                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1019                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1020                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1021                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1022                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1023                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1024                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1025                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1026                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1027                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1028                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1029                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1030     OUT_BCS_BATCH(batch,
1031                   pic_param->picture_coding_type << 9);
1032     OUT_BCS_BATCH(batch,
1033                   (slice_concealment_disable_bit << 31) |
1034                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1035                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1036     OUT_BCS_BATCH(batch, 0);
1037     OUT_BCS_BATCH(batch, 0);
1038     OUT_BCS_BATCH(batch, 0);
1039     OUT_BCS_BATCH(batch, 0);
1040     OUT_BCS_BATCH(batch, 0);
1041     OUT_BCS_BATCH(batch, 0);
1042     OUT_BCS_BATCH(batch, 0);
1043     OUT_BCS_BATCH(batch, 0);
1044     OUT_BCS_BATCH(batch, 0);
1045     ADVANCE_BCS_BATCH(batch);
1046 }
1047
1048 static void
1049 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1050                         struct decode_state *decode_state,
1051                         struct gen7_mfd_context *gen7_mfd_context)
1052 {
1053     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1054     int i, j;
1055
1056     /* Update internal QM state */
1057     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1058         VAIQMatrixBufferMPEG2 * const iq_matrix =
1059             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1060
1061         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1062             iq_matrix->load_intra_quantiser_matrix) {
1063             gen_iq_matrix->load_intra_quantiser_matrix =
1064                 iq_matrix->load_intra_quantiser_matrix;
1065             if (iq_matrix->load_intra_quantiser_matrix) {
1066                 for (j = 0; j < 64; j++)
1067                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1068                         iq_matrix->intra_quantiser_matrix[j];
1069             }
1070         }
1071
1072         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1073             iq_matrix->load_non_intra_quantiser_matrix) {
1074             gen_iq_matrix->load_non_intra_quantiser_matrix =
1075                 iq_matrix->load_non_intra_quantiser_matrix;
1076             if (iq_matrix->load_non_intra_quantiser_matrix) {
1077                 for (j = 0; j < 64; j++)
1078                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1079                         iq_matrix->non_intra_quantiser_matrix[j];
1080             }
1081         }
1082     }
1083
1084     /* Commit QM state to HW */
1085     for (i = 0; i < 2; i++) {
1086         unsigned char *qm = NULL;
1087         int qm_type;
1088
1089         if (i == 0) {
1090             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1091                 qm = gen_iq_matrix->intra_quantiser_matrix;
1092                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1093             }
1094         } else {
1095             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1096                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1097                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1098             }
1099         }
1100
1101         if (!qm)
1102             continue;
1103
1104         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1105     }
1106 }
1107
1108 static void
1109 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1110                           VAPictureParameterBufferMPEG2 *pic_param,
1111                           VASliceParameterBufferMPEG2 *slice_param,
1112                           VASliceParameterBufferMPEG2 *next_slice_param,
1113                           struct gen7_mfd_context *gen7_mfd_context)
1114 {
1115     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1116     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1117     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1118
1119     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1120         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1121         is_field_pic = 1;
1122     is_field_pic_wa = is_field_pic &&
1123         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1124
1125     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1126     hpos0 = slice_param->slice_horizontal_position;
1127
1128     if (next_slice_param == NULL) {
1129         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1130         hpos1 = 0;
1131     } else {
1132         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1133         hpos1 = next_slice_param->slice_horizontal_position;
1134     }
1135
1136     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1137
1138     BEGIN_BCS_BATCH(batch, 5);
1139     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1140     OUT_BCS_BATCH(batch, 
1141                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1142     OUT_BCS_BATCH(batch, 
1143                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1144     OUT_BCS_BATCH(batch,
1145                   hpos0 << 24 |
1146                   vpos0 << 16 |
1147                   mb_count << 8 |
1148                   (next_slice_param == NULL) << 5 |
1149                   (next_slice_param == NULL) << 3 |
1150                   (slice_param->macroblock_offset & 0x7));
1151     OUT_BCS_BATCH(batch,
1152                   (slice_param->quantiser_scale_code << 24) |
1153                   (vpos1 << 8 | hpos1));
1154     ADVANCE_BCS_BATCH(batch);
1155 }
1156
1157 static void
1158 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1159                               struct decode_state *decode_state,
1160                               struct gen7_mfd_context *gen7_mfd_context)
1161 {
1162     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1163     VAPictureParameterBufferMPEG2 *pic_param;
1164     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1165     dri_bo *slice_data_bo;
1166     int i, j;
1167
1168     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1169     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1170
1171     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1172     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1173     intel_batchbuffer_emit_mi_flush(batch);
1174     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1175     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1176     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1177     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1178     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1179     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1180
1181     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1182         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1183             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1184
1185     for (j = 0; j < decode_state->num_slice_params; j++) {
1186         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1187         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1188         slice_data_bo = decode_state->slice_datas[j]->bo;
1189         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190
1191         if (j == decode_state->num_slice_params - 1)
1192             next_slice_group_param = NULL;
1193         else
1194             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1195
1196         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1197             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1198
1199             if (i < decode_state->slice_params[j]->num_elements - 1)
1200                 next_slice_param = slice_param + 1;
1201             else
1202                 next_slice_param = next_slice_group_param;
1203
1204             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1205             slice_param++;
1206         }
1207     }
1208
1209     intel_batchbuffer_end_atomic(batch);
1210     intel_batchbuffer_flush(batch);
1211 }
1212
1213 static const int va_to_gen7_vc1_pic_type[5] = {
1214     GEN7_VC1_I_PICTURE,
1215     GEN7_VC1_P_PICTURE,
1216     GEN7_VC1_B_PICTURE,
1217     GEN7_VC1_BI_PICTURE,
1218     GEN7_VC1_P_PICTURE,
1219 };
1220
1221 static const int va_to_gen7_vc1_mv[4] = {
1222     1, /* 1-MV */
1223     2, /* 1-MV half-pel */
1224     3, /* 1-MV half-pef bilinear */
1225     0, /* Mixed MV */
1226 };
1227
1228 static const int b_picture_scale_factor[21] = {
1229     128, 85,  170, 64,  192,
1230     51,  102, 153, 204, 43,
1231     215, 37,  74,  111, 148,
1232     185, 222, 32,  96,  160, 
1233     224,
1234 };
1235
1236 static const int va_to_gen7_vc1_condover[3] = {
1237     0,
1238     2,
1239     3
1240 };
1241
1242 static const int va_to_gen7_vc1_profile[4] = {
1243     GEN7_VC1_SIMPLE_PROFILE,
1244     GEN7_VC1_MAIN_PROFILE,
1245     GEN7_VC1_RESERVED_PROFILE,
1246     GEN7_VC1_ADVANCED_PROFILE
1247 };
1248
1249 static void 
1250 gen8_mfd_free_vc1_surface(void **data)
1251 {
1252     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1253
1254     if (!gen7_vc1_surface)
1255         return;
1256
1257     dri_bo_unreference(gen7_vc1_surface->dmv);
1258     free(gen7_vc1_surface);
1259     *data = NULL;
1260 }
1261
1262 static void
1263 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1264                           VAPictureParameterBufferVC1 *pic_param,
1265                           struct object_surface *obj_surface)
1266 {
1267     struct i965_driver_data *i965 = i965_driver_data(ctx);
1268     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1269     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1270     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1271
1272     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1273
1274     if (!gen7_vc1_surface) {
1275         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1276         assert((obj_surface->size & 0x3f) == 0);
1277         obj_surface->private_data = gen7_vc1_surface;
1278     }
1279
1280     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1281
1282     if (gen7_vc1_surface->dmv == NULL) {
1283         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1284                                              "direct mv w/r buffer",
1285                                              width_in_mbs * height_in_mbs * 64,
1286                                              0x1000);
1287     }
1288 }
1289
1290 static void
1291 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1292                          struct decode_state *decode_state,
1293                          struct gen7_mfd_context *gen7_mfd_context)
1294 {
1295     VAPictureParameterBufferVC1 *pic_param;
1296     struct i965_driver_data *i965 = i965_driver_data(ctx);
1297     struct object_surface *obj_surface;
1298     dri_bo *bo;
1299     int width_in_mbs;
1300     int picture_type;
1301
1302     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1303     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1304     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1305     picture_type = pic_param->picture_fields.bits.picture_type;
1306  
1307     intel_update_vc1_frame_store_index(ctx,
1308                                        decode_state,
1309                                        pic_param,
1310                                        gen7_mfd_context->reference_surface);
1311
1312     /* Current decoded picture */
1313     obj_surface = decode_state->render_object;
1314     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1315     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1316
1317     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1318     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1319     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1320     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1321
1322     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1323     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1324     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1325     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1326
1327     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1328     bo = dri_bo_alloc(i965->intel.bufmgr,
1329                       "intra row store",
1330                       width_in_mbs * 64,
1331                       0x1000);
1332     assert(bo);
1333     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1334     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1335
1336     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1337     bo = dri_bo_alloc(i965->intel.bufmgr,
1338                       "deblocking filter row store",
1339                       width_in_mbs * 7 * 64,
1340                       0x1000);
1341     assert(bo);
1342     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1343     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1344
1345     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1346     bo = dri_bo_alloc(i965->intel.bufmgr,
1347                       "bsd mpc row store",
1348                       width_in_mbs * 96,
1349                       0x1000);
1350     assert(bo);
1351     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1352     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1353
1354     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1355
1356     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1357     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1358     
1359     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1360         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1361         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1362         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1363         int src_w, src_h;
1364         uint8_t *src = NULL, *dst = NULL;
1365
1366         assert(decode_state->bit_plane->buffer);
1367         src = decode_state->bit_plane->buffer;
1368
1369         bo = dri_bo_alloc(i965->intel.bufmgr,
1370                           "VC-1 Bitplane",
1371                           bitplane_width * height_in_mbs,
1372                           0x1000);
1373         assert(bo);
1374         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1375
1376         dri_bo_map(bo, True);
1377         assert(bo->virtual);
1378         dst = bo->virtual;
1379
1380         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1381             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1382                 int src_index, dst_index;
1383                 int src_shift;
1384                 uint8_t src_value;
1385
1386                 src_index = (src_h * width_in_mbs + src_w) / 2;
1387                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1388                 src_value = ((src[src_index] >> src_shift) & 0xf);
1389
1390                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1391                     src_value |= 0x2;
1392                 }
1393
1394                 dst_index = src_w / 2;
1395                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1396             }
1397
1398             if (src_w & 1)
1399                 dst[src_w / 2] >>= 4;
1400
1401             dst += bitplane_width;
1402         }
1403
1404         dri_bo_unmap(bo);
1405     } else
1406         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1407 }
1408
1409 static void
1410 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1411                        struct decode_state *decode_state,
1412                        struct gen7_mfd_context *gen7_mfd_context)
1413 {
1414     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1415     VAPictureParameterBufferVC1 *pic_param;
1416     struct object_surface *obj_surface;
1417     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1418     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1419     int unified_mv_mode;
1420     int ref_field_pic_polarity = 0;
1421     int scale_factor = 0;
1422     int trans_ac_y = 0;
1423     int dmv_surface_valid = 0;
1424     int brfd = 0;
1425     int fcm = 0;
1426     int picture_type;
1427     int profile;
1428     int overlap;
1429     int interpolation_mode = 0;
1430
1431     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1432     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1433
1434     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1435     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1436     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1437     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1438     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1439     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1440     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1441     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1442
1443     if (dquant == 0) {
1444         alt_pquant_config = 0;
1445         alt_pquant_edge_mask = 0;
1446     } else if (dquant == 2) {
1447         alt_pquant_config = 1;
1448         alt_pquant_edge_mask = 0xf;
1449     } else {
1450         assert(dquant == 1);
1451         if (dquantfrm == 0) {
1452             alt_pquant_config = 0;
1453             alt_pquant_edge_mask = 0;
1454             alt_pq = 0;
1455         } else {
1456             assert(dquantfrm == 1);
1457             alt_pquant_config = 1;
1458
1459             switch (dqprofile) {
1460             case 3:
1461                 if (dqbilevel == 0) {
1462                     alt_pquant_config = 2;
1463                     alt_pquant_edge_mask = 0;
1464                 } else {
1465                     assert(dqbilevel == 1);
1466                     alt_pquant_config = 3;
1467                     alt_pquant_edge_mask = 0;
1468                 }
1469                 break;
1470                 
1471             case 0:
1472                 alt_pquant_edge_mask = 0xf;
1473                 break;
1474
1475             case 1:
1476                 if (dqdbedge == 3)
1477                     alt_pquant_edge_mask = 0x9;
1478                 else
1479                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1480
1481                 break;
1482
1483             case 2:
1484                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1485                 break;
1486
1487             default:
1488                 assert(0);
1489             }
1490         }
1491     }
1492
1493     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1494         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1495         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1496     } else {
1497         assert(pic_param->mv_fields.bits.mv_mode < 4);
1498         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1499     }
1500
1501     if (pic_param->sequence_fields.bits.interlace == 1 &&
1502         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1503         /* FIXME: calculate reference field picture polarity */
1504         assert(0);
1505         ref_field_pic_polarity = 0;
1506     }
1507
1508     if (pic_param->b_picture_fraction < 21)
1509         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1510
1511     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1512     
1513     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1514         picture_type == GEN7_VC1_I_PICTURE)
1515         picture_type = GEN7_VC1_BI_PICTURE;
1516
1517     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1518         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1519     else {
1520         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1521
1522         /*
1523          * 8.3.6.2.1 Transform Type Selection
1524          * If variable-sized transform coding is not enabled,
1525          * then the 8x8 transform shall be used for all blocks.
1526          * it is also MFX_VC1_PIC_STATE requirement.
1527          */
1528         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1529             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1530             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1531         }
1532     }
1533
1534     if (picture_type == GEN7_VC1_B_PICTURE) {
1535         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1536
1537         obj_surface = decode_state->reference_objects[1];
1538
1539         if (obj_surface)
1540             gen7_vc1_surface = obj_surface->private_data;
1541
1542         if (!gen7_vc1_surface || 
1543             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1544              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1545             dmv_surface_valid = 0;
1546         else
1547             dmv_surface_valid = 1;
1548     }
1549
1550     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1551
1552     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1553         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1554     else {
1555         if (pic_param->picture_fields.bits.top_field_first)
1556             fcm = 2;
1557         else
1558             fcm = 3;
1559     }
1560
1561     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1562         brfd = pic_param->reference_fields.bits.reference_distance;
1563         brfd = (scale_factor * brfd) >> 8;
1564         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1565
1566         if (brfd < 0)
1567             brfd = 0;
1568     }
1569
1570     overlap = pic_param->sequence_fields.bits.overlap;
1571
1572     if (overlap) {
1573         overlap = 0;
1574         if (profile != GEN7_VC1_ADVANCED_PROFILE){
1575             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1576                 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1577                 overlap = 1;
1578             }
1579         }else {
1580             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1581                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1582                 overlap = 1;
1583             }
1584             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1585                 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1586                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1587                     overlap = 1;
1588                 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1589                            va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1590                     overlap = 1;
1591                 }
1592             }
1593         }
1594     } 
1595
1596     assert(pic_param->conditional_overlap_flag < 3);
1597     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1598
1599     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1600         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1601          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1602         interpolation_mode = 9; /* Half-pel bilinear */
1603     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1604              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1605               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1606         interpolation_mode = 1; /* Half-pel bicubic */
1607     else
1608         interpolation_mode = 0; /* Quarter-pel bicubic */
1609
1610     BEGIN_BCS_BATCH(batch, 6);
1611     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1612     OUT_BCS_BATCH(batch,
1613                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1614                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1615     OUT_BCS_BATCH(batch,
1616                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1617                   dmv_surface_valid << 15 |
1618                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1619                   pic_param->rounding_control << 13 |
1620                   pic_param->sequence_fields.bits.syncmarker << 12 |
1621                   interpolation_mode << 8 |
1622                   0 << 7 | /* FIXME: scale up or down ??? */
1623                   pic_param->range_reduction_frame << 6 |
1624                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1625                   overlap << 4 |
1626                   !pic_param->picture_fields.bits.is_first_field << 3 |
1627                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1628     OUT_BCS_BATCH(batch,
1629                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1630                   picture_type << 26 |
1631                   fcm << 24 |
1632                   alt_pq << 16 |
1633                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1634                   scale_factor << 0);
1635     OUT_BCS_BATCH(batch,
1636                   unified_mv_mode << 28 |
1637                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1638                   pic_param->fast_uvmc_flag << 26 |
1639                   ref_field_pic_polarity << 25 |
1640                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1641                   pic_param->reference_fields.bits.reference_distance << 20 |
1642                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1643                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1644                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1645                   alt_pquant_edge_mask << 4 |
1646                   alt_pquant_config << 2 |
1647                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1648                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1649     OUT_BCS_BATCH(batch,
1650                   !!pic_param->bitplane_present.value << 31 |
1651                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1652                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1653                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1654                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1655                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1656                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1657                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1658                   pic_param->mv_fields.bits.mv_table << 20 |
1659                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1660                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1661                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1662                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1663                   pic_param->mb_mode_table << 8 |
1664                   trans_ac_y << 6 |
1665                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1666                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1667                   pic_param->cbp_table << 0);
1668     ADVANCE_BCS_BATCH(batch);
1669 }
1670
1671 static void
1672 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1673                              struct decode_state *decode_state,
1674                              struct gen7_mfd_context *gen7_mfd_context)
1675 {
1676     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1677     VAPictureParameterBufferVC1 *pic_param;
1678     int intensitycomp_single;
1679
1680     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1681     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1682     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1683
1684     BEGIN_BCS_BATCH(batch, 6);
1685     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1686     OUT_BCS_BATCH(batch,
1687                   0 << 14 | /* FIXME: double ??? */
1688                   0 << 12 |
1689                   intensitycomp_single << 10 |
1690                   intensitycomp_single << 8 |
1691                   0 << 4 | /* FIXME: interlace mode */
1692                   0);
1693     OUT_BCS_BATCH(batch,
1694                   pic_param->luma_shift << 16 |
1695                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1696     OUT_BCS_BATCH(batch, 0);
1697     OUT_BCS_BATCH(batch, 0);
1698     OUT_BCS_BATCH(batch, 0);
1699     ADVANCE_BCS_BATCH(batch);
1700 }
1701
1702 static void
1703 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1704                               struct decode_state *decode_state,
1705                               struct gen7_mfd_context *gen7_mfd_context)
1706 {
1707     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1708     struct object_surface *obj_surface;
1709     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1710
1711     obj_surface = decode_state->render_object;
1712
1713     if (obj_surface && obj_surface->private_data) {
1714         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1715     }
1716
1717     obj_surface = decode_state->reference_objects[1];
1718
1719     if (obj_surface && obj_surface->private_data) {
1720         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1721     }
1722
1723     BEGIN_BCS_BATCH(batch, 7);
1724     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1725
1726     if (dmv_write_buffer)
1727         OUT_BCS_RELOC(batch, dmv_write_buffer,
1728                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1729                       0);
1730     else
1731         OUT_BCS_BATCH(batch, 0);
1732
1733     OUT_BCS_BATCH(batch, 0);
1734     OUT_BCS_BATCH(batch, 0);
1735
1736     if (dmv_read_buffer)
1737         OUT_BCS_RELOC(batch, dmv_read_buffer,
1738                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1739                       0);
1740     else
1741         OUT_BCS_BATCH(batch, 0);
1742     
1743     OUT_BCS_BATCH(batch, 0);
1744     OUT_BCS_BATCH(batch, 0);
1745                   
1746     ADVANCE_BCS_BATCH(batch);
1747 }
1748
1749 static int
1750 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1751 {
1752     int out_slice_data_bit_offset;
1753     int slice_header_size = in_slice_data_bit_offset / 8;
1754     int i, j;
1755
1756     if (profile != 3)
1757         out_slice_data_bit_offset = in_slice_data_bit_offset;
1758     else {
1759         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1760             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1761                 i++, j += 2;
1762             }
1763         }
1764
1765         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1766     }
1767
1768     return out_slice_data_bit_offset;
1769 }
1770
1771 static void
1772 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1773                         VAPictureParameterBufferVC1 *pic_param,
1774                         VASliceParameterBufferVC1 *slice_param,
1775                         VASliceParameterBufferVC1 *next_slice_param,
1776                         dri_bo *slice_data_bo,
1777                         struct gen7_mfd_context *gen7_mfd_context)
1778 {
1779     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1780     int next_slice_start_vert_pos;
1781     int macroblock_offset;
1782     uint8_t *slice_data = NULL;
1783
1784     dri_bo_map(slice_data_bo, 0);
1785     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1786     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1787                                                                slice_param->macroblock_offset,
1788                                                                pic_param->sequence_fields.bits.profile);
1789     dri_bo_unmap(slice_data_bo);
1790
1791     if (next_slice_param)
1792         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1793     else
1794         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1795
1796     BEGIN_BCS_BATCH(batch, 5);
1797     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1798     OUT_BCS_BATCH(batch, 
1799                   slice_param->slice_data_size - (macroblock_offset >> 3));
1800     OUT_BCS_BATCH(batch, 
1801                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1802     OUT_BCS_BATCH(batch,
1803                   slice_param->slice_vertical_position << 16 |
1804                   next_slice_start_vert_pos << 0);
1805     OUT_BCS_BATCH(batch,
1806                   (macroblock_offset & 0x7));
1807     ADVANCE_BCS_BATCH(batch);
1808 }
1809
1810 static void
1811 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1812                             struct decode_state *decode_state,
1813                             struct gen7_mfd_context *gen7_mfd_context)
1814 {
1815     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1816     VAPictureParameterBufferVC1 *pic_param;
1817     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1818     dri_bo *slice_data_bo;
1819     int i, j;
1820
1821     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1822     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1823
1824     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1825     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1826     intel_batchbuffer_emit_mi_flush(batch);
1827     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1828     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1829     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1830     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1831     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1832     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1833     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1834
1835     for (j = 0; j < decode_state->num_slice_params; j++) {
1836         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1837         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1838         slice_data_bo = decode_state->slice_datas[j]->bo;
1839         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1840
1841         if (j == decode_state->num_slice_params - 1)
1842             next_slice_group_param = NULL;
1843         else
1844             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1845
1846         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1847             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1848
1849             if (i < decode_state->slice_params[j]->num_elements - 1)
1850                 next_slice_param = slice_param + 1;
1851             else
1852                 next_slice_param = next_slice_group_param;
1853
1854             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1855             slice_param++;
1856         }
1857     }
1858
1859     intel_batchbuffer_end_atomic(batch);
1860     intel_batchbuffer_flush(batch);
1861 }
1862
1863 static void
1864 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1865                           struct decode_state *decode_state,
1866                           struct gen7_mfd_context *gen7_mfd_context)
1867 {
1868     struct object_surface *obj_surface;
1869     VAPictureParameterBufferJPEGBaseline *pic_param;
1870     int subsampling = SUBSAMPLE_YUV420;
1871     int fourcc = VA_FOURCC_IMC3;
1872
1873     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1874
1875     if (pic_param->num_components == 1)
1876         subsampling = SUBSAMPLE_YUV400;
1877     else if (pic_param->num_components == 3) {
1878         int h1 = pic_param->components[0].h_sampling_factor;
1879         int h2 = pic_param->components[1].h_sampling_factor;
1880         int h3 = pic_param->components[2].h_sampling_factor;
1881         int v1 = pic_param->components[0].v_sampling_factor;
1882         int v2 = pic_param->components[1].v_sampling_factor;
1883         int v3 = pic_param->components[2].v_sampling_factor;
1884
1885         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1886             v1 == 2 && v2 == 1 && v3 == 1) {
1887             subsampling = SUBSAMPLE_YUV420;
1888             fourcc = VA_FOURCC_IMC3;
1889         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1890                    v1 == 1 && v2 == 1 && v3 == 1) {
1891             subsampling = SUBSAMPLE_YUV422H;
1892             fourcc = VA_FOURCC_422H;
1893         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1894                    v1 == 1 && v2 == 1 && v3 == 1) {
1895             subsampling = SUBSAMPLE_YUV444;
1896             fourcc = VA_FOURCC_444P;
1897         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1898                    v1 == 1 && v2 == 1 && v3 == 1) {
1899             subsampling = SUBSAMPLE_YUV411;
1900             fourcc = VA_FOURCC_411P;
1901         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1902                    v1 == 2 && v2 == 1 && v3 == 1) {
1903             subsampling = SUBSAMPLE_YUV422V;
1904             fourcc = VA_FOURCC_422V;
1905         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1906                    v1 == 2 && v2 == 2 && v3 == 2) {
1907             subsampling = SUBSAMPLE_YUV422H;
1908             fourcc = VA_FOURCC_422H;
1909         } else if (h1 == 2 && h2 == 2 && h3 == 2 &&
1910                    v1 == 2 && v2 == 1 && v3 == 1) {
1911             subsampling = SUBSAMPLE_YUV422V;
1912             fourcc = VA_FOURCC_422V;
1913         } else
1914             assert(0);
1915     }
1916     else {
1917         assert(0);
1918     }
1919
1920     /* Current decoded picture */
1921     obj_surface = decode_state->render_object;
1922     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1923
1924     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1925     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1926     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1927     gen7_mfd_context->pre_deblocking_output.valid = 1;
1928
1929     gen7_mfd_context->post_deblocking_output.bo = NULL;
1930     gen7_mfd_context->post_deblocking_output.valid = 0;
1931
1932     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1933     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1934
1935     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1936     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1937
1938     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1939     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1940
1941     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1942     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1943
1944     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1945     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1946 }
1947
1948 static const int va_to_gen7_jpeg_rotation[4] = {
1949     GEN7_JPEG_ROTATION_0,
1950     GEN7_JPEG_ROTATION_90,
1951     GEN7_JPEG_ROTATION_180,
1952     GEN7_JPEG_ROTATION_270
1953 };
1954
1955 static void
1956 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1957                         struct decode_state *decode_state,
1958                         struct gen7_mfd_context *gen7_mfd_context)
1959 {
1960     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1961     VAPictureParameterBufferJPEGBaseline *pic_param;
1962     int chroma_type = GEN7_YUV420;
1963     int frame_width_in_blks;
1964     int frame_height_in_blks;
1965
1966     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1967     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1968
1969     if (pic_param->num_components == 1)
1970         chroma_type = GEN7_YUV400;
1971     else if (pic_param->num_components == 3) {
1972         int h1 = pic_param->components[0].h_sampling_factor;
1973         int h2 = pic_param->components[1].h_sampling_factor;
1974         int h3 = pic_param->components[2].h_sampling_factor;
1975         int v1 = pic_param->components[0].v_sampling_factor;
1976         int v2 = pic_param->components[1].v_sampling_factor;
1977         int v3 = pic_param->components[2].v_sampling_factor;
1978
1979         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1980             v1 == 2 && v2 == 1 && v3 == 1)
1981             chroma_type = GEN7_YUV420;
1982         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1983                  v1 == 1 && v2 == 1 && v3 == 1)
1984             chroma_type = GEN7_YUV422H_2Y;
1985         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1986                  v1 == 1 && v2 == 1 && v3 == 1)
1987             chroma_type = GEN7_YUV444;
1988         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1989                  v1 == 1 && v2 == 1 && v3 == 1)
1990             chroma_type = GEN7_YUV411;
1991         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1992                  v1 == 2 && v2 == 1 && v3 == 1)
1993             chroma_type = GEN7_YUV422V_2Y;
1994         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1995                  v1 == 2 && v2 == 2 && v3 == 2)
1996             chroma_type = GEN7_YUV422H_4Y;
1997         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1998                  v1 == 2 && v2 == 1 && v3 == 1)
1999             chroma_type = GEN7_YUV422V_4Y;
2000         else
2001             assert(0);
2002     }
2003
2004     if (chroma_type == GEN7_YUV400 ||
2005         chroma_type == GEN7_YUV444 ||
2006         chroma_type == GEN7_YUV422V_2Y) {
2007         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2008         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2009     } else if (chroma_type == GEN7_YUV411) {
2010         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2011         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2012     } else {
2013         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2014         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2015     }
2016
2017     BEGIN_BCS_BATCH(batch, 3);
2018     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2019     OUT_BCS_BATCH(batch,
2020                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2021                   (chroma_type << 0));
2022     OUT_BCS_BATCH(batch,
2023                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2024                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2025     ADVANCE_BCS_BATCH(batch);
2026 }
2027
2028 static const int va_to_gen7_jpeg_hufftable[2] = {
2029     MFX_HUFFTABLE_ID_Y,
2030     MFX_HUFFTABLE_ID_UV
2031 };
2032
2033 static void
2034 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2035                                struct decode_state *decode_state,
2036                                struct gen7_mfd_context *gen7_mfd_context,
2037                                int num_tables)
2038 {
2039     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2040     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2041     int index;
2042
2043     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2044         return;
2045
2046     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2047
2048     for (index = 0; index < num_tables; index++) {
2049         int id = va_to_gen7_jpeg_hufftable[index];
2050         if (!huffman_table->load_huffman_table[index])
2051             continue;
2052         BEGIN_BCS_BATCH(batch, 53);
2053         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2054         OUT_BCS_BATCH(batch, id);
2055         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2056         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2057         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2058         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2059         ADVANCE_BCS_BATCH(batch);
2060     }
2061 }
2062
2063 static const int va_to_gen7_jpeg_qm[5] = {
2064     -1,
2065     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2066     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2067     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2068     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2069 };
2070
2071 static void
2072 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2073                        struct decode_state *decode_state,
2074                        struct gen7_mfd_context *gen7_mfd_context)
2075 {
2076     VAPictureParameterBufferJPEGBaseline *pic_param;
2077     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2078     int index;
2079
2080     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2081         return;
2082
2083     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2084     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2085
2086     assert(pic_param->num_components <= 3);
2087
2088     for (index = 0; index < pic_param->num_components; index++) {
2089         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2090         int qm_type;
2091         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2092         unsigned char raster_qm[64];
2093         int j;
2094
2095         if (id > 4 || id < 1)
2096             continue;
2097
2098         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2099             continue;
2100
2101         qm_type = va_to_gen7_jpeg_qm[id];
2102
2103         for (j = 0; j < 64; j++)
2104             raster_qm[zigzag_direct[j]] = qm[j];
2105
2106         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2107     }
2108 }
2109
2110 static void
2111 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2112                          VAPictureParameterBufferJPEGBaseline *pic_param,
2113                          VASliceParameterBufferJPEGBaseline *slice_param,
2114                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2115                          dri_bo *slice_data_bo,
2116                          struct gen7_mfd_context *gen7_mfd_context)
2117 {
2118     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2119     int scan_component_mask = 0;
2120     int i;
2121
2122     assert(slice_param->num_components > 0);
2123     assert(slice_param->num_components < 4);
2124     assert(slice_param->num_components <= pic_param->num_components);
2125
2126     for (i = 0; i < slice_param->num_components; i++) {
2127         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2128         case 1:
2129             scan_component_mask |= (1 << 0);
2130             break;
2131         case 2:
2132             scan_component_mask |= (1 << 1);
2133             break;
2134         case 3:
2135             scan_component_mask |= (1 << 2);
2136             break;
2137         default:
2138             assert(0);
2139             break;
2140         }
2141     }
2142
2143     BEGIN_BCS_BATCH(batch, 6);
2144     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2145     OUT_BCS_BATCH(batch, 
2146                   slice_param->slice_data_size);
2147     OUT_BCS_BATCH(batch, 
2148                   slice_param->slice_data_offset);
2149     OUT_BCS_BATCH(batch,
2150                   slice_param->slice_horizontal_position << 16 |
2151                   slice_param->slice_vertical_position << 0);
2152     OUT_BCS_BATCH(batch,
2153                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2154                   (scan_component_mask << 27) |                 /* scan components */
2155                   (0 << 26) |   /* disable interrupt allowed */
2156                   (slice_param->num_mcus << 0));                /* MCU count */
2157     OUT_BCS_BATCH(batch,
2158                   (slice_param->restart_interval << 0));    /* RestartInterval */
2159     ADVANCE_BCS_BATCH(batch);
2160 }
2161
2162 /* Workaround for JPEG decoding on Ivybridge */
2163 #ifdef JPEG_WA
2164
2165 static struct {
2166     int width;
2167     int height;
2168     unsigned char data[32];
2169     int data_size;
2170     int data_bit_offset;
2171     int qp;
2172 } gen7_jpeg_wa_clip = {
2173     16,
2174     16,
2175     {
2176         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2177         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2178     },
2179     14,
2180     40,
2181     28,
2182 };
2183
2184 static void
2185 gen8_jpeg_wa_init(VADriverContextP ctx,
2186                   struct gen7_mfd_context *gen7_mfd_context)
2187 {
2188     struct i965_driver_data *i965 = i965_driver_data(ctx);
2189     VAStatus status;
2190     struct object_surface *obj_surface;
2191
2192     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2193         i965_DestroySurfaces(ctx,
2194                              &gen7_mfd_context->jpeg_wa_surface_id,
2195                              1);
2196
2197     status = i965_CreateSurfaces(ctx,
2198                                  gen7_jpeg_wa_clip.width,
2199                                  gen7_jpeg_wa_clip.height,
2200                                  VA_RT_FORMAT_YUV420,
2201                                  1,
2202                                  &gen7_mfd_context->jpeg_wa_surface_id);
2203     assert(status == VA_STATUS_SUCCESS);
2204
2205     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2206     assert(obj_surface);
2207     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2208     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2209
2210     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2211         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2212                                                                "JPEG WA data",
2213                                                                0x1000,
2214                                                                0x1000);
2215         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2216                        0,
2217                        gen7_jpeg_wa_clip.data_size,
2218                        gen7_jpeg_wa_clip.data);
2219     }
2220 }
2221
2222 static void
2223 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2224                               struct gen7_mfd_context *gen7_mfd_context)
2225 {
2226     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2227
2228     BEGIN_BCS_BATCH(batch, 5);
2229     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2230     OUT_BCS_BATCH(batch,
2231                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2232                   (MFD_MODE_VLD << 15) | /* VLD mode */
2233                   (0 << 10) | /* disable Stream-Out */
2234                   (0 << 9)  | /* Post Deblocking Output */
2235                   (1 << 8)  | /* Pre Deblocking Output */
2236                   (0 << 5)  | /* not in stitch mode */
2237                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2238                   (MFX_FORMAT_AVC << 0));
2239     OUT_BCS_BATCH(batch,
2240                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2241                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2242                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2243                   (0 << 1)  |
2244                   (0 << 0));
2245     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2246     OUT_BCS_BATCH(batch, 0); /* reserved */
2247     ADVANCE_BCS_BATCH(batch);
2248 }
2249
2250 static void
2251 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2252                            struct gen7_mfd_context *gen7_mfd_context)
2253 {
2254     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2255     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2256
2257     BEGIN_BCS_BATCH(batch, 6);
2258     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2259     OUT_BCS_BATCH(batch, 0);
2260     OUT_BCS_BATCH(batch,
2261                   ((obj_surface->orig_width - 1) << 18) |
2262                   ((obj_surface->orig_height - 1) << 4));
2263     OUT_BCS_BATCH(batch,
2264                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2265                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2266                   (0 << 22) | /* surface object control state, ignored */
2267                   ((obj_surface->width - 1) << 3) | /* pitch */
2268                   (0 << 2)  | /* must be 0 */
2269                   (1 << 1)  | /* must be tiled */
2270                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2271     OUT_BCS_BATCH(batch,
2272                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2273                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2274     OUT_BCS_BATCH(batch,
2275                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2276                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2277     ADVANCE_BCS_BATCH(batch);
2278 }
2279
2280 static void
2281 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2282                                  struct gen7_mfd_context *gen7_mfd_context)
2283 {
2284     struct i965_driver_data *i965 = i965_driver_data(ctx);
2285     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2286     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2287     dri_bo *intra_bo;
2288     int i;
2289
2290     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2291                             "intra row store",
2292                             128 * 64,
2293                             0x1000);
2294
2295     BEGIN_BCS_BATCH(batch, 61);
2296     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2297     OUT_BCS_RELOC(batch,
2298                   obj_surface->bo,
2299                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2300                   0);
2301         OUT_BCS_BATCH(batch, 0);
2302         OUT_BCS_BATCH(batch, 0);
2303     
2304
2305     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2306         OUT_BCS_BATCH(batch, 0);
2307         OUT_BCS_BATCH(batch, 0);
2308
2309         /* uncompressed-video & stream out 7-12 */
2310     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2311     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2312         OUT_BCS_BATCH(batch, 0);
2313         OUT_BCS_BATCH(batch, 0);
2314         OUT_BCS_BATCH(batch, 0);
2315         OUT_BCS_BATCH(batch, 0);
2316
2317         /* the DW 13-15 is for intra row store scratch */
2318     OUT_BCS_RELOC(batch,
2319                   intra_bo,
2320                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2321                   0);
2322         OUT_BCS_BATCH(batch, 0);
2323         OUT_BCS_BATCH(batch, 0);
2324
2325         /* the DW 16-18 is for deblocking filter */ 
2326     OUT_BCS_BATCH(batch, 0);
2327         OUT_BCS_BATCH(batch, 0);
2328         OUT_BCS_BATCH(batch, 0);
2329
2330     /* DW 19..50 */
2331     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2332         OUT_BCS_BATCH(batch, 0);
2333         OUT_BCS_BATCH(batch, 0);
2334     }
2335     OUT_BCS_BATCH(batch, 0);
2336
2337         /* the DW52-54 is for mb status address */
2338     OUT_BCS_BATCH(batch, 0);
2339         OUT_BCS_BATCH(batch, 0);
2340         OUT_BCS_BATCH(batch, 0);
2341         /* the DW56-60 is for ILDB & second ILDB address */
2342     OUT_BCS_BATCH(batch, 0);
2343         OUT_BCS_BATCH(batch, 0);
2344         OUT_BCS_BATCH(batch, 0);
2345     OUT_BCS_BATCH(batch, 0);
2346         OUT_BCS_BATCH(batch, 0);
2347         OUT_BCS_BATCH(batch, 0);
2348
2349     ADVANCE_BCS_BATCH(batch);
2350
2351     dri_bo_unreference(intra_bo);
2352 }
2353
2354 static void
2355 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2356                                      struct gen7_mfd_context *gen7_mfd_context)
2357 {
2358     struct i965_driver_data *i965 = i965_driver_data(ctx);
2359     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2360     dri_bo *bsd_mpc_bo, *mpr_bo;
2361
2362     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2363                               "bsd mpc row store",
2364                               11520, /* 1.5 * 120 * 64 */
2365                               0x1000);
2366
2367     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2368                           "mpr row store",
2369                           7680, /* 1. 0 * 120 * 64 */
2370                           0x1000);
2371
2372     BEGIN_BCS_BATCH(batch, 10);
2373     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2374
2375     OUT_BCS_RELOC(batch,
2376                   bsd_mpc_bo,
2377                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2378                   0);
2379
2380     OUT_BCS_BATCH(batch, 0);
2381     OUT_BCS_BATCH(batch, 0);
2382
2383     OUT_BCS_RELOC(batch,
2384                   mpr_bo,
2385                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2386                   0);
2387     OUT_BCS_BATCH(batch, 0);
2388     OUT_BCS_BATCH(batch, 0);
2389
2390     OUT_BCS_BATCH(batch, 0);
2391     OUT_BCS_BATCH(batch, 0);
2392     OUT_BCS_BATCH(batch, 0);
2393
2394     ADVANCE_BCS_BATCH(batch);
2395
2396     dri_bo_unreference(bsd_mpc_bo);
2397     dri_bo_unreference(mpr_bo);
2398 }
2399
2400 static void
2401 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2402                           struct gen7_mfd_context *gen7_mfd_context)
2403 {
2404
2405 }
2406
2407 static void
2408 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2409                            struct gen7_mfd_context *gen7_mfd_context)
2410 {
2411     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2412     int img_struct = 0;
2413     int mbaff_frame_flag = 0;
2414     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2415
2416     BEGIN_BCS_BATCH(batch, 16);
2417     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2418     OUT_BCS_BATCH(batch, 
2419                   width_in_mbs * height_in_mbs);
2420     OUT_BCS_BATCH(batch, 
2421                   ((height_in_mbs - 1) << 16) | 
2422                   ((width_in_mbs - 1) << 0));
2423     OUT_BCS_BATCH(batch, 
2424                   (0 << 24) |
2425                   (0 << 16) |
2426                   (0 << 14) |
2427                   (0 << 13) |
2428                   (0 << 12) | /* differ from GEN6 */
2429                   (0 << 10) |
2430                   (img_struct << 8));
2431     OUT_BCS_BATCH(batch,
2432                   (1 << 10) | /* 4:2:0 */
2433                   (1 << 7) |  /* CABAC */
2434                   (0 << 6) |
2435                   (0 << 5) |
2436                   (0 << 4) |
2437                   (0 << 3) |
2438                   (1 << 2) |
2439                   (mbaff_frame_flag << 1) |
2440                   (0 << 0));
2441     OUT_BCS_BATCH(batch, 0);
2442     OUT_BCS_BATCH(batch, 0);
2443     OUT_BCS_BATCH(batch, 0);
2444     OUT_BCS_BATCH(batch, 0);
2445     OUT_BCS_BATCH(batch, 0);
2446     OUT_BCS_BATCH(batch, 0);
2447     OUT_BCS_BATCH(batch, 0);
2448     OUT_BCS_BATCH(batch, 0);
2449     OUT_BCS_BATCH(batch, 0);
2450     OUT_BCS_BATCH(batch, 0);
2451     OUT_BCS_BATCH(batch, 0);
2452     ADVANCE_BCS_BATCH(batch);
2453 }
2454
2455 static void
2456 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2457                                   struct gen7_mfd_context *gen7_mfd_context)
2458 {
2459     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2460     int i;
2461
2462     BEGIN_BCS_BATCH(batch, 71);
2463     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2464
2465     /* reference surfaces 0..15 */
2466     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2467         OUT_BCS_BATCH(batch, 0); /* top */
2468         OUT_BCS_BATCH(batch, 0); /* bottom */
2469     }
2470         
2471         OUT_BCS_BATCH(batch, 0);
2472
2473     /* the current decoding frame/field */
2474     OUT_BCS_BATCH(batch, 0); /* top */
2475     OUT_BCS_BATCH(batch, 0);
2476     OUT_BCS_BATCH(batch, 0);
2477
2478     /* POC List */
2479     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2480         OUT_BCS_BATCH(batch, 0);
2481         OUT_BCS_BATCH(batch, 0);
2482     }
2483
2484     OUT_BCS_BATCH(batch, 0);
2485     OUT_BCS_BATCH(batch, 0);
2486
2487     ADVANCE_BCS_BATCH(batch);
2488 }
2489
2490 static void
2491 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2492                                      struct gen7_mfd_context *gen7_mfd_context)
2493 {
2494     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2495
2496     BEGIN_BCS_BATCH(batch, 11);
2497     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2498     OUT_BCS_RELOC(batch,
2499                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2500                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2501                   0);
2502     OUT_BCS_BATCH(batch, 0);
2503     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2504     OUT_BCS_BATCH(batch, 0);
2505     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2506     OUT_BCS_BATCH(batch, 0);
2507     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2508     OUT_BCS_BATCH(batch, 0);
2509     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2510     OUT_BCS_BATCH(batch, 0);
2511     ADVANCE_BCS_BATCH(batch);
2512 }
2513
2514 static void
2515 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2516                             struct gen7_mfd_context *gen7_mfd_context)
2517 {
2518     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2519
2520     /* the input bitsteam format on GEN7 differs from GEN6 */
2521     BEGIN_BCS_BATCH(batch, 6);
2522     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2523     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2524     OUT_BCS_BATCH(batch, 0);
2525     OUT_BCS_BATCH(batch,
2526                   (0 << 31) |
2527                   (0 << 14) |
2528                   (0 << 12) |
2529                   (0 << 10) |
2530                   (0 << 8));
2531     OUT_BCS_BATCH(batch,
2532                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2533                   (0 << 5)  |
2534                   (0 << 4)  |
2535                   (1 << 3) | /* LastSlice Flag */
2536                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2537     OUT_BCS_BATCH(batch, 0);
2538     ADVANCE_BCS_BATCH(batch);
2539 }
2540
2541 static void
2542 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2543                              struct gen7_mfd_context *gen7_mfd_context)
2544 {
2545     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2546     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2547     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2548     int first_mb_in_slice = 0;
2549     int slice_type = SLICE_TYPE_I;
2550
2551     BEGIN_BCS_BATCH(batch, 11);
2552     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2553     OUT_BCS_BATCH(batch, slice_type);
2554     OUT_BCS_BATCH(batch, 
2555                   (num_ref_idx_l1 << 24) |
2556                   (num_ref_idx_l0 << 16) |
2557                   (0 << 8) |
2558                   (0 << 0));
2559     OUT_BCS_BATCH(batch, 
2560                   (0 << 29) |
2561                   (1 << 27) |   /* disable Deblocking */
2562                   (0 << 24) |
2563                   (gen7_jpeg_wa_clip.qp << 16) |
2564                   (0 << 8) |
2565                   (0 << 0));
2566     OUT_BCS_BATCH(batch, 
2567                   (slice_ver_pos << 24) |
2568                   (slice_hor_pos << 16) | 
2569                   (first_mb_in_slice << 0));
2570     OUT_BCS_BATCH(batch,
2571                   (next_slice_ver_pos << 16) |
2572                   (next_slice_hor_pos << 0));
2573     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2574     OUT_BCS_BATCH(batch, 0);
2575     OUT_BCS_BATCH(batch, 0);
2576     OUT_BCS_BATCH(batch, 0);
2577     OUT_BCS_BATCH(batch, 0);
2578     ADVANCE_BCS_BATCH(batch);
2579 }
2580
2581 static void
2582 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2583                  struct gen7_mfd_context *gen7_mfd_context)
2584 {
2585     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2586     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2587     intel_batchbuffer_emit_mi_flush(batch);
2588     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2589     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2590     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2591     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2592     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2593     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2594     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2595
2596     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2597     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2598     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2599 }
2600
2601 #endif
2602
2603 void
2604 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2605                              struct decode_state *decode_state,
2606                              struct gen7_mfd_context *gen7_mfd_context)
2607 {
2608     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2609     VAPictureParameterBufferJPEGBaseline *pic_param;
2610     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2611     dri_bo *slice_data_bo;
2612     int i, j, max_selector = 0;
2613
2614     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2615     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2616
2617     /* Currently only support Baseline DCT */
2618     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2619     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2620 #ifdef JPEG_WA
2621     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2622 #endif
2623     intel_batchbuffer_emit_mi_flush(batch);
2624     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2625     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2626     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2627     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2628     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2629
2630     for (j = 0; j < decode_state->num_slice_params; j++) {
2631         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2632         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2633         slice_data_bo = decode_state->slice_datas[j]->bo;
2634         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2635
2636         if (j == decode_state->num_slice_params - 1)
2637             next_slice_group_param = NULL;
2638         else
2639             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2640
2641         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2642             int component;
2643
2644             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2645
2646             if (i < decode_state->slice_params[j]->num_elements - 1)
2647                 next_slice_param = slice_param + 1;
2648             else
2649                 next_slice_param = next_slice_group_param;
2650
2651             for (component = 0; component < slice_param->num_components; component++) {
2652                 if (max_selector < slice_param->components[component].dc_table_selector)
2653                     max_selector = slice_param->components[component].dc_table_selector;
2654
2655                 if (max_selector < slice_param->components[component].ac_table_selector)
2656                     max_selector = slice_param->components[component].ac_table_selector;
2657             }
2658
2659             slice_param++;
2660         }
2661     }
2662
2663     assert(max_selector < 2);
2664     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2665
2666     for (j = 0; j < decode_state->num_slice_params; j++) {
2667         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2668         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2669         slice_data_bo = decode_state->slice_datas[j]->bo;
2670         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2671
2672         if (j == decode_state->num_slice_params - 1)
2673             next_slice_group_param = NULL;
2674         else
2675             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2676
2677         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2678             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2679
2680             if (i < decode_state->slice_params[j]->num_elements - 1)
2681                 next_slice_param = slice_param + 1;
2682             else
2683                 next_slice_param = next_slice_group_param;
2684
2685             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2686             slice_param++;
2687         }
2688     }
2689
2690     intel_batchbuffer_end_atomic(batch);
2691     intel_batchbuffer_flush(batch);
2692 }
2693
2694 static const int vp8_dc_qlookup[128] =
2695 {
2696       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2697      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2698      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2699      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2700      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2701      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2702      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2703     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2704 };
2705
2706 static const int vp8_ac_qlookup[128] =
2707 {
2708       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2709      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2710      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2711      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2712      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2713     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2714     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2715     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2716 };
2717
2718 static inline unsigned int vp8_clip_quantization_index(int index)
2719 {
2720     if(index > 127)
2721         return 127;
2722     else if(index <0)
2723         return 0;
2724
2725     return index;
2726 }
2727
2728 static void
2729 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2730                           struct decode_state *decode_state,
2731                           struct gen7_mfd_context *gen7_mfd_context)
2732 {
2733     struct object_surface *obj_surface;
2734     struct i965_driver_data *i965 = i965_driver_data(ctx);
2735     dri_bo *bo;
2736     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2737     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2738     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2739
2740     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2741     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2742
2743     intel_update_vp8_frame_store_index(ctx,
2744                                        decode_state,
2745                                        pic_param,
2746                                        gen7_mfd_context->reference_surface);
2747
2748     /* Current decoded picture */
2749     obj_surface = decode_state->render_object;
2750     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2751
2752     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2753     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2754     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2755     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2756
2757     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2758     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2759     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2760     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2761
2762     intel_ensure_vp8_segmentation_buffer(ctx,
2763         &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2764
2765     /* The same as AVC */
2766     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2767     bo = dri_bo_alloc(i965->intel.bufmgr,
2768                       "intra row store",
2769                       width_in_mbs * 64,
2770                       0x1000);
2771     assert(bo);
2772     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2773     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2774
2775     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2776     bo = dri_bo_alloc(i965->intel.bufmgr,
2777                       "deblocking filter row store",
2778                       width_in_mbs * 64 * 4,
2779                       0x1000);
2780     assert(bo);
2781     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2782     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2783
2784     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2785     bo = dri_bo_alloc(i965->intel.bufmgr,
2786                       "bsd mpc row store",
2787                       width_in_mbs * 64 * 2,
2788                       0x1000);
2789     assert(bo);
2790     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2791     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2792
2793     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2794     bo = dri_bo_alloc(i965->intel.bufmgr,
2795                       "mpr row store",
2796                       width_in_mbs * 64 * 2,
2797                       0x1000);
2798     assert(bo);
2799     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2800     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2801
2802     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2803 }
2804
2805 static void
2806 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2807                        struct decode_state *decode_state,
2808                        struct gen7_mfd_context *gen7_mfd_context)
2809 {
2810     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2811     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2812     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2813     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2814     dri_bo *probs_bo = decode_state->probability_data->bo;
2815     int i, j,log2num;
2816     unsigned int quantization_value[4][6];
2817
2818     /* There is no safe way to error out if the segmentation buffer
2819        could not be allocated. So, instead of aborting, simply decode
2820        something even if the result may look totally inacurate */
2821     const unsigned int enable_segmentation =
2822         pic_param->pic_fields.bits.segmentation_enabled &&
2823         gen7_mfd_context->segmentation_buffer.valid;
2824         
2825     log2num = (int)log2(slice_param->num_of_partitions - 1);
2826
2827     BEGIN_BCS_BATCH(batch, 38);
2828     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2829     OUT_BCS_BATCH(batch,
2830                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2831                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2832     OUT_BCS_BATCH(batch,
2833                   log2num << 24 |
2834                   pic_param->pic_fields.bits.sharpness_level << 16 |
2835                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2836                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2837                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2838                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2839                   (enable_segmentation &&
2840                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2841                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2842                   (enable_segmentation &&
2843                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2844                   (enable_segmentation &&
2845                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2846                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2847                   pic_param->pic_fields.bits.filter_type << 4 |
2848                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2849                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2850
2851     OUT_BCS_BATCH(batch,
2852                   pic_param->loop_filter_level[3] << 24 |
2853                   pic_param->loop_filter_level[2] << 16 |
2854                   pic_param->loop_filter_level[1] <<  8 |
2855                   pic_param->loop_filter_level[0] <<  0);
2856
2857     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2858     for (i = 0; i < 4; i++) {
2859                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2860                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2861                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2862                 /* 101581>>16 is equivalent to 155/100 */
2863                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2864                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2865                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2866
2867                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2868                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2869
2870                 OUT_BCS_BATCH(batch,
2871                       quantization_value[i][0] << 16 | /* Y1AC */
2872                       quantization_value[i][1] <<  0); /* Y1DC */
2873         OUT_BCS_BATCH(batch,
2874                       quantization_value[i][5] << 16 | /* UVAC */
2875                       quantization_value[i][4] <<  0); /* UVDC */
2876         OUT_BCS_BATCH(batch,
2877                       quantization_value[i][3] << 16 | /* Y2AC */
2878                       quantization_value[i][2] <<  0); /* Y2DC */
2879     }
2880
2881     /* CoeffProbability table for non-key frame, DW16-DW18 */
2882     if (probs_bo) {
2883         OUT_BCS_RELOC(batch, probs_bo,
2884                       0, I915_GEM_DOMAIN_INSTRUCTION,
2885                       0);
2886         OUT_BCS_BATCH(batch, 0);
2887         OUT_BCS_BATCH(batch, 0);
2888     } else {
2889         OUT_BCS_BATCH(batch, 0);
2890         OUT_BCS_BATCH(batch, 0);
2891         OUT_BCS_BATCH(batch, 0);
2892     }
2893
2894     OUT_BCS_BATCH(batch,
2895                   pic_param->mb_segment_tree_probs[2] << 16 |
2896                   pic_param->mb_segment_tree_probs[1] <<  8 |
2897                   pic_param->mb_segment_tree_probs[0] <<  0);
2898
2899     OUT_BCS_BATCH(batch,
2900                   pic_param->prob_skip_false << 24 |
2901                   pic_param->prob_intra      << 16 |
2902                   pic_param->prob_last       <<  8 |
2903                   pic_param->prob_gf         <<  0);
2904
2905     OUT_BCS_BATCH(batch,
2906                   pic_param->y_mode_probs[3] << 24 |
2907                   pic_param->y_mode_probs[2] << 16 |
2908                   pic_param->y_mode_probs[1] <<  8 |
2909                   pic_param->y_mode_probs[0] <<  0);
2910
2911     OUT_BCS_BATCH(batch,
2912                   pic_param->uv_mode_probs[2] << 16 |
2913                   pic_param->uv_mode_probs[1] <<  8 |
2914                   pic_param->uv_mode_probs[0] <<  0);
2915     
2916     /* MV update value, DW23-DW32 */
2917     for (i = 0; i < 2; i++) {
2918         for (j = 0; j < 20; j += 4) {
2919             OUT_BCS_BATCH(batch,
2920                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2921                           pic_param->mv_probs[i][j + 2] << 16 |
2922                           pic_param->mv_probs[i][j + 1] <<  8 |
2923                           pic_param->mv_probs[i][j + 0] <<  0);
2924         }
2925     }
2926
2927     OUT_BCS_BATCH(batch,
2928                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2929                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2930                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2931                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2932
2933     OUT_BCS_BATCH(batch,
2934                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2935                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2936                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2937                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2938
2939     /* segmentation id stream base address, DW35-DW37 */
2940     if (enable_segmentation) {
2941         OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2942                       0, I915_GEM_DOMAIN_INSTRUCTION,
2943                       0);
2944         OUT_BCS_BATCH(batch, 0);
2945         OUT_BCS_BATCH(batch, 0);
2946     }
2947     else {
2948         OUT_BCS_BATCH(batch, 0);
2949         OUT_BCS_BATCH(batch, 0);
2950         OUT_BCS_BATCH(batch, 0);
2951     }
2952     ADVANCE_BCS_BATCH(batch);
2953 }
2954
2955 static void
2956 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2957                         VAPictureParameterBufferVP8 *pic_param,
2958                         VASliceParameterBufferVP8 *slice_param,
2959                         dri_bo *slice_data_bo,
2960                         struct gen7_mfd_context *gen7_mfd_context)
2961 {
2962     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2963     int i, log2num;
2964     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2965     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2966     unsigned int partition_size_0 = slice_param->partition_size[0];
2967
2968     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2969     if (used_bits == 8) {
2970         used_bits = 0;
2971         offset += 1;
2972         partition_size_0 -= 1;
2973     }
2974
2975     assert(slice_param->num_of_partitions >= 2);
2976     assert(slice_param->num_of_partitions <= 9);
2977
2978     log2num = (int)log2(slice_param->num_of_partitions - 1);
2979
2980     BEGIN_BCS_BATCH(batch, 22);
2981     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2982     OUT_BCS_BATCH(batch,
2983                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2984                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2985                   log2num << 4 |
2986                   (slice_param->macroblock_offset & 0x7));
2987     OUT_BCS_BATCH(batch,
2988                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2989                   0);
2990
2991     OUT_BCS_BATCH(batch, partition_size_0 + 1);
2992     OUT_BCS_BATCH(batch, offset);
2993     //partion sizes in bytes are present after the above first partition when there are more than one token partition
2994     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2995     for (i = 1; i < 9; i++) {
2996         if (i < slice_param->num_of_partitions) {
2997             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
2998             OUT_BCS_BATCH(batch, offset);
2999         } else {
3000             OUT_BCS_BATCH(batch, 0);
3001             OUT_BCS_BATCH(batch, 0);
3002         }
3003
3004         offset += slice_param->partition_size[i];
3005     }
3006
3007     OUT_BCS_BATCH(batch, 0); /* concealment method */
3008
3009     ADVANCE_BCS_BATCH(batch);
3010 }
3011
3012 void
3013 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3014                             struct decode_state *decode_state,
3015                             struct gen7_mfd_context *gen7_mfd_context)
3016 {
3017     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3018     VAPictureParameterBufferVP8 *pic_param;
3019     VASliceParameterBufferVP8 *slice_param;
3020     dri_bo *slice_data_bo;
3021
3022     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3023     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3024
3025     /* one slice per frame */
3026     if (decode_state->num_slice_params != 1 ||
3027         (!decode_state->slice_params ||
3028          !decode_state->slice_params[0] ||
3029          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3030         (!decode_state->slice_datas ||
3031          !decode_state->slice_datas[0] ||
3032          !decode_state->slice_datas[0]->bo) ||
3033         !decode_state->probability_data) {
3034         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3035
3036         return;
3037     }
3038
3039     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3040     slice_data_bo = decode_state->slice_datas[0]->bo;
3041
3042     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3043     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3044     intel_batchbuffer_emit_mi_flush(batch);
3045     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3046     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3047     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3048     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3049     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3050     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3051     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3052     intel_batchbuffer_end_atomic(batch);
3053     intel_batchbuffer_flush(batch);
3054 }
3055
3056 static VAStatus
3057 gen8_mfd_decode_picture(VADriverContextP ctx, 
3058                         VAProfile profile, 
3059                         union codec_state *codec_state,
3060                         struct hw_context *hw_context)
3061
3062 {
3063     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3064     struct decode_state *decode_state = &codec_state->decode;
3065     VAStatus vaStatus;
3066
3067     assert(gen7_mfd_context);
3068
3069     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3070
3071     if (vaStatus != VA_STATUS_SUCCESS)
3072         goto out;
3073
3074     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3075
3076     switch (profile) {
3077     case VAProfileMPEG2Simple:
3078     case VAProfileMPEG2Main:
3079         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3080         break;
3081         
3082     case VAProfileH264ConstrainedBaseline:
3083     case VAProfileH264Main:
3084     case VAProfileH264High:
3085     case VAProfileH264StereoHigh:
3086     case VAProfileH264MultiviewHigh:
3087         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3088         break;
3089
3090     case VAProfileVC1Simple:
3091     case VAProfileVC1Main:
3092     case VAProfileVC1Advanced:
3093         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3094         break;
3095
3096     case VAProfileJPEGBaseline:
3097         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3098         break;
3099
3100     case VAProfileVP8Version0_3:
3101         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3102         break;
3103
3104     default:
3105         assert(0);
3106         break;
3107     }
3108
3109     vaStatus = VA_STATUS_SUCCESS;
3110
3111 out:
3112     return vaStatus;
3113 }
3114
3115 static void
3116 gen8_mfd_context_destroy(void *hw_context)
3117 {
3118     VADriverContextP ctx;
3119     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3120
3121     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3122
3123     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3124     gen7_mfd_context->post_deblocking_output.bo = NULL;
3125
3126     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3127     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3128
3129     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3130     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3131
3132     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3133     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3134
3135     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3136     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3137
3138     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3139     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3140
3141     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3142     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3143
3144     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3145     gen7_mfd_context->segmentation_buffer.bo = NULL;
3146
3147     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3148
3149     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3150         i965_DestroySurfaces(ctx,
3151                              &gen7_mfd_context->jpeg_wa_surface_id,
3152                              1);
3153         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3154     }
3155
3156     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3157     free(gen7_mfd_context);
3158 }
3159
3160 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3161                                     struct gen7_mfd_context *gen7_mfd_context)
3162 {
3163     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3164     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3165     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3166     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3167 }
3168
3169 struct hw_context *
3170 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3171 {
3172     struct intel_driver_data *intel = intel_driver_data(ctx);
3173     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3174     int i;
3175
3176     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3177     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3178     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3179
3180     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3181         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3182         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3183     }
3184
3185     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3186     gen7_mfd_context->segmentation_buffer.valid = 0;
3187
3188     switch (obj_config->profile) {
3189     case VAProfileMPEG2Simple:
3190     case VAProfileMPEG2Main:
3191         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3192         break;
3193
3194     case VAProfileH264ConstrainedBaseline:
3195     case VAProfileH264Main:
3196     case VAProfileH264High:
3197     case VAProfileH264StereoHigh:
3198     case VAProfileH264MultiviewHigh:
3199         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3200         break;
3201     default:
3202         break;
3203     }
3204
3205     gen7_mfd_context->driver_context = ctx;
3206     return (struct hw_context *)gen7_mfd_context;
3207 }