OSDN Git Service

H264: Use macroblock pair to calculate H264 decoding parameter under MBAFF flag
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77         gen7_avc_surface->base.frame_store_id = -1;
78         assert((obj_surface->size & 0x3f) == 0);
79         obj_surface->private_data = gen7_avc_surface;
80     }
81
82     /* DMV buffers now relate to the whole frame, irrespective of
83        field coding modes */
84     if (gen7_avc_surface->dmv_top == NULL) {
85         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86                                                  "direct mv w/r buffer",
87                                                  width_in_mbs * height_in_mbs * 128,
88                                                  0x1000);
89         assert(gen7_avc_surface->dmv_top);
90     }
91 }
92
93 static void
94 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
95                           struct decode_state *decode_state,
96                           int standard_select,
97                           struct gen7_mfd_context *gen7_mfd_context)
98 {
99     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
100
101     assert(standard_select == MFX_FORMAT_MPEG2 ||
102            standard_select == MFX_FORMAT_AVC ||
103            standard_select == MFX_FORMAT_VC1 ||
104            standard_select == MFX_FORMAT_JPEG ||
105            standard_select == MFX_FORMAT_VP8);
106
107     BEGIN_BCS_BATCH(batch, 5);
108     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
109     OUT_BCS_BATCH(batch,
110                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
111                   (MFD_MODE_VLD << 15) | /* VLD mode */
112                   (0 << 10) | /* disable Stream-Out */
113                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
114                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
115                   (0 << 5)  | /* not in stitch mode */
116                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
117                   (standard_select << 0));
118     OUT_BCS_BATCH(batch,
119                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
120                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
121                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
122                   (0 << 1)  |
123                   (0 << 0));
124     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
125     OUT_BCS_BATCH(batch, 0); /* reserved */
126     ADVANCE_BCS_BATCH(batch);
127 }
128
129 static void
130 gen8_mfd_surface_state(VADriverContextP ctx,
131                        struct decode_state *decode_state,
132                        int standard_select,
133                        struct gen7_mfd_context *gen7_mfd_context)
134 {
135     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
136     struct object_surface *obj_surface = decode_state->render_object;
137     unsigned int y_cb_offset;
138     unsigned int y_cr_offset;
139     unsigned int surface_format;
140
141     assert(obj_surface);
142
143     y_cb_offset = obj_surface->y_cb_offset;
144     y_cr_offset = obj_surface->y_cr_offset;
145
146     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
147         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
148
149     BEGIN_BCS_BATCH(batch, 6);
150     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
151     OUT_BCS_BATCH(batch, 0);
152     OUT_BCS_BATCH(batch,
153                   ((obj_surface->orig_height - 1) << 18) |
154                   ((obj_surface->orig_width - 1) << 4));
155     OUT_BCS_BATCH(batch,
156                   (surface_format << 28) | /* 420 planar YUV surface */
157                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
158                   (0 << 22) | /* surface object control state, ignored */
159                   ((obj_surface->width - 1) << 3) | /* pitch */
160                   (0 << 2)  | /* must be 0 */
161                   (1 << 1)  | /* must be tiled */
162                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
163     OUT_BCS_BATCH(batch,
164                   (0 << 16) | /* X offset for U(Cb), must be 0 */
165                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
166     OUT_BCS_BATCH(batch,
167                   (0 << 16) | /* X offset for V(Cr), must be 0 */
168                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
169     ADVANCE_BCS_BATCH(batch);
170 }
171
172 static void
173 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
174                              struct decode_state *decode_state,
175                              int standard_select,
176                              struct gen7_mfd_context *gen7_mfd_context)
177 {
178     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
179     int i;
180
181     BEGIN_BCS_BATCH(batch, 61);
182     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
183         /* Pre-deblock 1-3 */
184     if (gen7_mfd_context->pre_deblocking_output.valid)
185         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
186                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
187                       0);
188     else
189         OUT_BCS_BATCH(batch, 0);
190
191         OUT_BCS_BATCH(batch, 0);
192         OUT_BCS_BATCH(batch, 0);
193         /* Post-debloing 4-6 */
194     if (gen7_mfd_context->post_deblocking_output.valid)
195         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
196                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197                       0);
198     else
199         OUT_BCS_BATCH(batch, 0);
200
201         OUT_BCS_BATCH(batch, 0);
202         OUT_BCS_BATCH(batch, 0);
203
204         /* uncompressed-video & stream out 7-12 */
205     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
207         OUT_BCS_BATCH(batch, 0);
208         OUT_BCS_BATCH(batch, 0);
209         OUT_BCS_BATCH(batch, 0);
210         OUT_BCS_BATCH(batch, 0);
211
212         /* intra row-store scratch 13-15 */
213     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
214         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
215                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
216                       0);
217     else
218         OUT_BCS_BATCH(batch, 0);
219
220         OUT_BCS_BATCH(batch, 0);
221         OUT_BCS_BATCH(batch, 0);
222         /* deblocking-filter-row-store 16-18 */
223     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
224         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
225                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
226                       0);
227     else
228         OUT_BCS_BATCH(batch, 0);
229         OUT_BCS_BATCH(batch, 0);
230         OUT_BCS_BATCH(batch, 0);
231
232     /* DW 19..50 */
233     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
234         struct object_surface *obj_surface;
235
236         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
237             gen7_mfd_context->reference_surface[i].obj_surface &&
238             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
239             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
240
241             OUT_BCS_RELOC(batch, obj_surface->bo,
242                           I915_GEM_DOMAIN_INSTRUCTION, 0,
243                           0);
244         } else {
245             OUT_BCS_BATCH(batch, 0);
246         }
247         
248         OUT_BCS_BATCH(batch, 0);
249     }
250     
251     /* reference property 51 */
252     OUT_BCS_BATCH(batch, 0);  
253         
254     /* Macroblock status & ILDB 52-57 */
255     OUT_BCS_BATCH(batch, 0);
256     OUT_BCS_BATCH(batch, 0);
257     OUT_BCS_BATCH(batch, 0);
258     OUT_BCS_BATCH(batch, 0);
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261
262     /* the second Macroblock status 58-60 */    
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265     OUT_BCS_BATCH(batch, 0);
266
267     ADVANCE_BCS_BATCH(batch);
268 }
269
270 static void
271 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
272                                  dri_bo *slice_data_bo,
273                                  int standard_select,
274                                  struct gen7_mfd_context *gen7_mfd_context)
275 {
276     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
277
278     BEGIN_BCS_BATCH(batch, 26);
279     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
280         /* MFX In BS 1-5 */
281     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
282     OUT_BCS_BATCH(batch, 0);
283     OUT_BCS_BATCH(batch, 0);
284         /* Upper bound 4-5 */   
285     OUT_BCS_BATCH(batch, 0);
286     OUT_BCS_BATCH(batch, 0);
287
288         /* MFX indirect MV 6-10 */
289     OUT_BCS_BATCH(batch, 0);
290     OUT_BCS_BATCH(batch, 0);
291     OUT_BCS_BATCH(batch, 0);
292     OUT_BCS_BATCH(batch, 0);
293     OUT_BCS_BATCH(batch, 0);
294         
295         /* MFX IT_COFF 11-15 */
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299     OUT_BCS_BATCH(batch, 0);
300     OUT_BCS_BATCH(batch, 0);
301
302         /* MFX IT_DBLK 16-20 */
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0);
307     OUT_BCS_BATCH(batch, 0);
308
309         /* MFX PAK_BSE object for encoder 21-25 */
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313     OUT_BCS_BATCH(batch, 0);
314     OUT_BCS_BATCH(batch, 0);
315
316     ADVANCE_BCS_BATCH(batch);
317 }
318
319 static void
320 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
321                                  struct decode_state *decode_state,
322                                  int standard_select,
323                                  struct gen7_mfd_context *gen7_mfd_context)
324 {
325     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
326
327     BEGIN_BCS_BATCH(batch, 10);
328     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
329
330     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
331         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
332                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
333                       0);
334         else
335                 OUT_BCS_BATCH(batch, 0);
336                 
337     OUT_BCS_BATCH(batch, 0);
338     OUT_BCS_BATCH(batch, 0);
339         /* MPR Row Store Scratch buffer 4-6 */
340     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
341         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
342                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
343                       0);
344     else
345         OUT_BCS_BATCH(batch, 0);
346
347     OUT_BCS_BATCH(batch, 0);
348     OUT_BCS_BATCH(batch, 0);
349
350         /* Bitplane 7-9 */ 
351     if (gen7_mfd_context->bitplane_read_buffer.valid)
352         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
353                       I915_GEM_DOMAIN_INSTRUCTION, 0,
354                       0);
355     else
356         OUT_BCS_BATCH(batch, 0);
357     OUT_BCS_BATCH(batch, 0);
358     OUT_BCS_BATCH(batch, 0);
359     ADVANCE_BCS_BATCH(batch);
360 }
361
362 static void
363 gen8_mfd_qm_state(VADriverContextP ctx,
364                   int qm_type,
365                   unsigned char *qm,
366                   int qm_length,
367                   struct gen7_mfd_context *gen7_mfd_context)
368 {
369     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
370     unsigned int qm_buffer[16];
371
372     assert(qm_length <= 16 * 4);
373     memcpy(qm_buffer, qm, qm_length);
374
375     BEGIN_BCS_BATCH(batch, 18);
376     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
377     OUT_BCS_BATCH(batch, qm_type << 0);
378     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
379     ADVANCE_BCS_BATCH(batch);
380 }
381
382 static void
383 gen8_mfd_avc_img_state(VADriverContextP ctx,
384                        struct decode_state *decode_state,
385                        struct gen7_mfd_context *gen7_mfd_context)
386 {
387     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
388     int img_struct;
389     int mbaff_frame_flag;
390     unsigned int width_in_mbs, height_in_mbs;
391     VAPictureParameterBufferH264 *pic_param;
392
393     assert(decode_state->pic_param && decode_state->pic_param->buffer);
394     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
395     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
396
397     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
398         img_struct = 1;
399     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
400         img_struct = 3;
401     else
402         img_struct = 0;
403
404     if ((img_struct & 0x1) == 0x1) {
405         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
406     } else {
407         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
408     }
409
410     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
411         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
412         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
413     } else {
414         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
415     }
416
417     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
418                         !pic_param->pic_fields.bits.field_pic_flag);
419
420     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
421     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
422
423     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
424     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
425            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
426     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
427
428     BEGIN_BCS_BATCH(batch, 17);
429     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
430     OUT_BCS_BATCH(batch, 
431                   (width_in_mbs * height_in_mbs - 1));
432     OUT_BCS_BATCH(batch, 
433                   ((height_in_mbs - 1) << 16) | 
434                   ((width_in_mbs - 1) << 0));
435     OUT_BCS_BATCH(batch, 
436                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
437                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
438                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
439                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
440                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
441                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
442                   (img_struct << 8));
443     OUT_BCS_BATCH(batch,
444                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
445                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
446                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
447                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
448                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
449                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
450                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
451                   (mbaff_frame_flag << 1) |
452                   (pic_param->pic_fields.bits.field_pic_flag << 0));
453     OUT_BCS_BATCH(batch, 0);
454     OUT_BCS_BATCH(batch, 0);
455     OUT_BCS_BATCH(batch, 0);
456     OUT_BCS_BATCH(batch, 0);
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     OUT_BCS_BATCH(batch, 0);
465     ADVANCE_BCS_BATCH(batch);
466 }
467
468 static void
469 gen8_mfd_avc_qm_state(VADriverContextP ctx,
470                       struct decode_state *decode_state,
471                       struct gen7_mfd_context *gen7_mfd_context)
472 {
473     VAIQMatrixBufferH264 *iq_matrix;
474     VAPictureParameterBufferH264 *pic_param;
475
476     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
477         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
478     else
479         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
480
481     assert(decode_state->pic_param && decode_state->pic_param->buffer);
482     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
483
484     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
485     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
486
487     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
488         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
489         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
490     }
491 }
492
493 static inline void
494 gen8_mfd_avc_picid_state(VADriverContextP ctx,
495     struct decode_state *decode_state,
496     struct gen7_mfd_context *gen7_mfd_context)
497 {
498     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
499         gen7_mfd_context->reference_surface);
500 }
501
502 static void
503 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
504                               struct decode_state *decode_state,
505                               VAPictureParameterBufferH264 *pic_param,
506                               VASliceParameterBufferH264 *slice_param,
507                               struct gen7_mfd_context *gen7_mfd_context)
508 {
509     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
510     struct object_surface *obj_surface;
511     GenAvcSurface *gen7_avc_surface;
512     VAPictureH264 *va_pic;
513     int i;
514
515     BEGIN_BCS_BATCH(batch, 71);
516     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
517
518     /* reference surfaces 0..15 */
519     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
520         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
521             gen7_mfd_context->reference_surface[i].obj_surface &&
522             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
523
524             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
525             gen7_avc_surface = obj_surface->private_data;
526
527             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
528                           I915_GEM_DOMAIN_INSTRUCTION, 0,
529                           0);
530             OUT_BCS_BATCH(batch, 0);
531         } else {
532             OUT_BCS_BATCH(batch, 0);
533             OUT_BCS_BATCH(batch, 0);
534         }
535     }
536     
537     OUT_BCS_BATCH(batch, 0);
538
539     /* the current decoding frame/field */
540     va_pic = &pic_param->CurrPic;
541     obj_surface = decode_state->render_object;
542     assert(obj_surface->bo && obj_surface->private_data);
543     gen7_avc_surface = obj_surface->private_data;
544
545     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
546                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
547                   0);
548
549     OUT_BCS_BATCH(batch, 0);
550     OUT_BCS_BATCH(batch, 0);
551
552     /* POC List */
553     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
554         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
555
556         if (obj_surface) {
557             const VAPictureH264 * const va_pic = avc_find_picture(
558                 obj_surface->base.id, pic_param->ReferenceFrames,
559                 ARRAY_ELEMS(pic_param->ReferenceFrames));
560
561             assert(va_pic != NULL);
562             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
563             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
564         } else {
565             OUT_BCS_BATCH(batch, 0);
566             OUT_BCS_BATCH(batch, 0);
567         }
568     }
569
570     va_pic = &pic_param->CurrPic;
571     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
572     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
573
574     ADVANCE_BCS_BATCH(batch);
575 }
576
577 static void
578 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
579                                  VAPictureParameterBufferH264 *pic_param,
580                                  VASliceParameterBufferH264 *next_slice_param,
581                                  struct gen7_mfd_context *gen7_mfd_context)
582 {
583     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
584 }
585
586 static void
587 gen8_mfd_avc_slice_state(VADriverContextP ctx,
588                          VAPictureParameterBufferH264 *pic_param,
589                          VASliceParameterBufferH264 *slice_param,
590                          VASliceParameterBufferH264 *next_slice_param,
591                          struct gen7_mfd_context *gen7_mfd_context)
592 {
593     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
594     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
595     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
596     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
597     int num_ref_idx_l0, num_ref_idx_l1;
598     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
599                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
600     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
601     int slice_type;
602
603     if (slice_param->slice_type == SLICE_TYPE_I ||
604         slice_param->slice_type == SLICE_TYPE_SI) {
605         slice_type = SLICE_TYPE_I;
606     } else if (slice_param->slice_type == SLICE_TYPE_P ||
607                slice_param->slice_type == SLICE_TYPE_SP) {
608         slice_type = SLICE_TYPE_P;
609     } else { 
610         assert(slice_param->slice_type == SLICE_TYPE_B);
611         slice_type = SLICE_TYPE_B;
612     }
613
614     if (slice_type == SLICE_TYPE_I) {
615         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
616         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
617         num_ref_idx_l0 = 0;
618         num_ref_idx_l1 = 0;
619     } else if (slice_type == SLICE_TYPE_P) {
620         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
621         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
622         num_ref_idx_l1 = 0;
623     } else {
624         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
625         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
626     }
627
628     first_mb_in_slice = slice_param->first_mb_in_slice;
629     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
630     slice_ver_pos = first_mb_in_slice / width_in_mbs;
631
632     if (mbaff_picture)
633         slice_ver_pos = slice_ver_pos << 1;
634     if (next_slice_param) {
635         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
636         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
637         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
638
639         if (mbaff_picture)
640             next_slice_ver_pos = next_slice_ver_pos << 1;
641     } else {
642         next_slice_hor_pos = 0;
643         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
644     }
645
646     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
647     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
648     OUT_BCS_BATCH(batch, slice_type);
649     OUT_BCS_BATCH(batch, 
650                   (num_ref_idx_l1 << 24) |
651                   (num_ref_idx_l0 << 16) |
652                   (slice_param->chroma_log2_weight_denom << 8) |
653                   (slice_param->luma_log2_weight_denom << 0));
654     OUT_BCS_BATCH(batch, 
655                   (slice_param->direct_spatial_mv_pred_flag << 29) |
656                   (slice_param->disable_deblocking_filter_idc << 27) |
657                   (slice_param->cabac_init_idc << 24) |
658                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
659                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
660                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
661     OUT_BCS_BATCH(batch, 
662                   (slice_ver_pos << 24) |
663                   (slice_hor_pos << 16) | 
664                   (first_mb_in_slice << 0));
665     OUT_BCS_BATCH(batch,
666                   (next_slice_ver_pos << 16) |
667                   (next_slice_hor_pos << 0));
668     OUT_BCS_BATCH(batch, 
669                   (next_slice_param == NULL) << 19); /* last slice flag */
670     OUT_BCS_BATCH(batch, 0);
671     OUT_BCS_BATCH(batch, 0);
672     OUT_BCS_BATCH(batch, 0);
673     OUT_BCS_BATCH(batch, 0);
674     ADVANCE_BCS_BATCH(batch);
675 }
676
677 static inline void
678 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
679                            VAPictureParameterBufferH264 *pic_param,
680                            VASliceParameterBufferH264 *slice_param,
681                            struct gen7_mfd_context *gen7_mfd_context)
682 {
683     gen6_send_avc_ref_idx_state(
684         gen7_mfd_context->base.batch,
685         slice_param,
686         gen7_mfd_context->reference_surface
687     );
688 }
689
690 static void
691 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
692                                 VAPictureParameterBufferH264 *pic_param,
693                                 VASliceParameterBufferH264 *slice_param,
694                                 struct gen7_mfd_context *gen7_mfd_context)
695 {
696     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
697     int i, j, num_weight_offset_table = 0;
698     short weightoffsets[32 * 6];
699
700     if ((slice_param->slice_type == SLICE_TYPE_P ||
701          slice_param->slice_type == SLICE_TYPE_SP) &&
702         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
703         num_weight_offset_table = 1;
704     }
705     
706     if ((slice_param->slice_type == SLICE_TYPE_B) &&
707         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
708         num_weight_offset_table = 2;
709     }
710
711     for (i = 0; i < num_weight_offset_table; i++) {
712         BEGIN_BCS_BATCH(batch, 98);
713         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
714         OUT_BCS_BATCH(batch, i);
715
716         if (i == 0) {
717             for (j = 0; j < 32; j++) {
718                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
719                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
720                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
721                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
722                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
723                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
724             }
725         } else {
726             for (j = 0; j < 32; j++) {
727                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
728                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
729                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
730                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
731                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
732                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
733             }
734         }
735
736         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
737         ADVANCE_BCS_BATCH(batch);
738     }
739 }
740
741 static void
742 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
743                         VAPictureParameterBufferH264 *pic_param,
744                         VASliceParameterBufferH264 *slice_param,
745                         dri_bo *slice_data_bo,
746                         VASliceParameterBufferH264 *next_slice_param,
747                         struct gen7_mfd_context *gen7_mfd_context)
748 {
749     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
750     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
751                                                             slice_param,
752                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
753
754     /* the input bitsteam format on GEN7 differs from GEN6 */
755     BEGIN_BCS_BATCH(batch, 6);
756     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
757     OUT_BCS_BATCH(batch, 
758                   (slice_param->slice_data_size));
759     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
760     OUT_BCS_BATCH(batch,
761                   (0 << 31) |
762                   (0 << 14) |
763                   (0 << 12) |
764                   (0 << 10) |
765                   (0 << 8));
766     OUT_BCS_BATCH(batch,
767                   ((slice_data_bit_offset >> 3) << 16) |
768                   (1 << 7)  |
769                   (0 << 5)  |
770                   (0 << 4)  |
771                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
772                   (slice_data_bit_offset & 0x7));
773     OUT_BCS_BATCH(batch, 0);
774     ADVANCE_BCS_BATCH(batch);
775 }
776
777 static inline void
778 gen8_mfd_avc_context_init(
779     VADriverContextP         ctx,
780     struct gen7_mfd_context *gen7_mfd_context
781 )
782 {
783     /* Initialize flat scaling lists */
784     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
785 }
786
787 static void
788 gen8_mfd_avc_decode_init(VADriverContextP ctx,
789                          struct decode_state *decode_state,
790                          struct gen7_mfd_context *gen7_mfd_context)
791 {
792     VAPictureParameterBufferH264 *pic_param;
793     VASliceParameterBufferH264 *slice_param;
794     struct i965_driver_data *i965 = i965_driver_data(ctx);
795     struct object_surface *obj_surface;
796     dri_bo *bo;
797     int i, j, enable_avc_ildb = 0;
798     unsigned int width_in_mbs, height_in_mbs;
799
800     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
801         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
802         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
803
804         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
805             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
806             assert((slice_param->slice_type == SLICE_TYPE_I) ||
807                    (slice_param->slice_type == SLICE_TYPE_SI) ||
808                    (slice_param->slice_type == SLICE_TYPE_P) ||
809                    (slice_param->slice_type == SLICE_TYPE_SP) ||
810                    (slice_param->slice_type == SLICE_TYPE_B));
811
812             if (slice_param->disable_deblocking_filter_idc != 1) {
813                 enable_avc_ildb = 1;
814                 break;
815             }
816
817             slice_param++;
818         }
819     }
820
821     assert(decode_state->pic_param && decode_state->pic_param->buffer);
822     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
823     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
824         gen7_mfd_context->reference_surface);
825     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
826     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
827     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
828     assert(height_in_mbs > 0 && height_in_mbs <= 256);
829
830     /* Current decoded picture */
831     obj_surface = decode_state->render_object;
832     if (pic_param->pic_fields.bits.reference_pic_flag)
833         obj_surface->flags |= SURFACE_REFERENCED;
834     else
835         obj_surface->flags &= ~SURFACE_REFERENCED;
836
837     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
838     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
839
840     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
841     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
842     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
843     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
844
845     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
846     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
847     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
848     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
849
850     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
851     bo = dri_bo_alloc(i965->intel.bufmgr,
852                       "intra row store",
853                       width_in_mbs * 64,
854                       0x1000);
855     assert(bo);
856     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
857     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
858
859     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
860     bo = dri_bo_alloc(i965->intel.bufmgr,
861                       "deblocking filter row store",
862                       width_in_mbs * 64 * 4,
863                       0x1000);
864     assert(bo);
865     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
866     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
867
868     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
869     bo = dri_bo_alloc(i965->intel.bufmgr,
870                       "bsd mpc row store",
871                       width_in_mbs * 64 * 2,
872                       0x1000);
873     assert(bo);
874     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
875     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
876
877     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
878     bo = dri_bo_alloc(i965->intel.bufmgr,
879                       "mpr row store",
880                       width_in_mbs * 64 * 2,
881                       0x1000);
882     assert(bo);
883     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
884     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
885
886     gen7_mfd_context->bitplane_read_buffer.valid = 0;
887 }
888
889 static void
890 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
891                             struct decode_state *decode_state,
892                             struct gen7_mfd_context *gen7_mfd_context)
893 {
894     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
895     VAPictureParameterBufferH264 *pic_param;
896     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
897     dri_bo *slice_data_bo;
898     int i, j;
899
900     assert(decode_state->pic_param && decode_state->pic_param->buffer);
901     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
902     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
903
904     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
905     intel_batchbuffer_emit_mi_flush(batch);
906     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
907     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
908     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
909     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
910     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
911     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
912     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
913
914     for (j = 0; j < decode_state->num_slice_params; j++) {
915         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
916         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
917         slice_data_bo = decode_state->slice_datas[j]->bo;
918         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
919
920         if (j == decode_state->num_slice_params - 1)
921             next_slice_group_param = NULL;
922         else
923             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
924
925         if (j == 0 && slice_param->first_mb_in_slice)
926             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); 
927
928         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
929             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
930             assert((slice_param->slice_type == SLICE_TYPE_I) ||
931                    (slice_param->slice_type == SLICE_TYPE_SI) ||
932                    (slice_param->slice_type == SLICE_TYPE_P) ||
933                    (slice_param->slice_type == SLICE_TYPE_SP) ||
934                    (slice_param->slice_type == SLICE_TYPE_B));
935
936             if (i < decode_state->slice_params[j]->num_elements - 1)
937                 next_slice_param = slice_param + 1;
938             else
939                 next_slice_param = next_slice_group_param;
940
941             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
942             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
943             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
944             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
945             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
946             slice_param++;
947         }
948     }
949
950     intel_batchbuffer_end_atomic(batch);
951     intel_batchbuffer_flush(batch);
952 }
953
954 static void
955 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
956                            struct decode_state *decode_state,
957                            struct gen7_mfd_context *gen7_mfd_context)
958 {
959     VAPictureParameterBufferMPEG2 *pic_param;
960     struct i965_driver_data *i965 = i965_driver_data(ctx);
961     struct object_surface *obj_surface;
962     dri_bo *bo;
963     unsigned int width_in_mbs;
964
965     assert(decode_state->pic_param && decode_state->pic_param->buffer);
966     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
967     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
968
969     mpeg2_set_reference_surfaces(
970         ctx,
971         gen7_mfd_context->reference_surface,
972         decode_state,
973         pic_param
974     );
975
976     /* Current decoded picture */
977     obj_surface = decode_state->render_object;
978     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
979
980     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
981     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
982     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
983     gen7_mfd_context->pre_deblocking_output.valid = 1;
984
985     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
986     bo = dri_bo_alloc(i965->intel.bufmgr,
987                       "bsd mpc row store",
988                       width_in_mbs * 96,
989                       0x1000);
990     assert(bo);
991     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
992     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
993
994     gen7_mfd_context->post_deblocking_output.valid = 0;
995     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
996     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
997     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
998     gen7_mfd_context->bitplane_read_buffer.valid = 0;
999 }
1000
1001 static void
1002 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1003                          struct decode_state *decode_state,
1004                          struct gen7_mfd_context *gen7_mfd_context)
1005 {
1006     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1007     VAPictureParameterBufferMPEG2 *pic_param;
1008     unsigned int slice_concealment_disable_bit = 0;
1009
1010     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1011     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1012
1013     slice_concealment_disable_bit = 1;
1014
1015     BEGIN_BCS_BATCH(batch, 13);
1016     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1017     OUT_BCS_BATCH(batch,
1018                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1019                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1020                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1021                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1022                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1023                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1024                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1025                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1026                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1027                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1028                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1029                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1030     OUT_BCS_BATCH(batch,
1031                   pic_param->picture_coding_type << 9);
1032     OUT_BCS_BATCH(batch,
1033                   (slice_concealment_disable_bit << 31) |
1034                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1035                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1036     OUT_BCS_BATCH(batch, 0);
1037     OUT_BCS_BATCH(batch, 0);
1038     OUT_BCS_BATCH(batch, 0);
1039     OUT_BCS_BATCH(batch, 0);
1040     OUT_BCS_BATCH(batch, 0);
1041     OUT_BCS_BATCH(batch, 0);
1042     OUT_BCS_BATCH(batch, 0);
1043     OUT_BCS_BATCH(batch, 0);
1044     OUT_BCS_BATCH(batch, 0);
1045     ADVANCE_BCS_BATCH(batch);
1046 }
1047
1048 static void
1049 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1050                         struct decode_state *decode_state,
1051                         struct gen7_mfd_context *gen7_mfd_context)
1052 {
1053     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1054     int i, j;
1055
1056     /* Update internal QM state */
1057     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1058         VAIQMatrixBufferMPEG2 * const iq_matrix =
1059             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1060
1061         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1062             iq_matrix->load_intra_quantiser_matrix) {
1063             gen_iq_matrix->load_intra_quantiser_matrix =
1064                 iq_matrix->load_intra_quantiser_matrix;
1065             if (iq_matrix->load_intra_quantiser_matrix) {
1066                 for (j = 0; j < 64; j++)
1067                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1068                         iq_matrix->intra_quantiser_matrix[j];
1069             }
1070         }
1071
1072         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1073             iq_matrix->load_non_intra_quantiser_matrix) {
1074             gen_iq_matrix->load_non_intra_quantiser_matrix =
1075                 iq_matrix->load_non_intra_quantiser_matrix;
1076             if (iq_matrix->load_non_intra_quantiser_matrix) {
1077                 for (j = 0; j < 64; j++)
1078                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1079                         iq_matrix->non_intra_quantiser_matrix[j];
1080             }
1081         }
1082     }
1083
1084     /* Commit QM state to HW */
1085     for (i = 0; i < 2; i++) {
1086         unsigned char *qm = NULL;
1087         int qm_type;
1088
1089         if (i == 0) {
1090             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1091                 qm = gen_iq_matrix->intra_quantiser_matrix;
1092                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1093             }
1094         } else {
1095             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1096                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1097                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1098             }
1099         }
1100
1101         if (!qm)
1102             continue;
1103
1104         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1105     }
1106 }
1107
1108 static void
1109 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1110                           VAPictureParameterBufferMPEG2 *pic_param,
1111                           VASliceParameterBufferMPEG2 *slice_param,
1112                           VASliceParameterBufferMPEG2 *next_slice_param,
1113                           struct gen7_mfd_context *gen7_mfd_context)
1114 {
1115     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1116     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1117     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1118
1119     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1120         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1121         is_field_pic = 1;
1122     is_field_pic_wa = is_field_pic &&
1123         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1124
1125     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1126     hpos0 = slice_param->slice_horizontal_position;
1127
1128     if (next_slice_param == NULL) {
1129         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1130         hpos1 = 0;
1131     } else {
1132         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1133         hpos1 = next_slice_param->slice_horizontal_position;
1134     }
1135
1136     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1137
1138     BEGIN_BCS_BATCH(batch, 5);
1139     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1140     OUT_BCS_BATCH(batch, 
1141                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1142     OUT_BCS_BATCH(batch, 
1143                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1144     OUT_BCS_BATCH(batch,
1145                   hpos0 << 24 |
1146                   vpos0 << 16 |
1147                   mb_count << 8 |
1148                   (next_slice_param == NULL) << 5 |
1149                   (next_slice_param == NULL) << 3 |
1150                   (slice_param->macroblock_offset & 0x7));
1151     OUT_BCS_BATCH(batch,
1152                   (slice_param->quantiser_scale_code << 24) |
1153                   (vpos1 << 8 | hpos1));
1154     ADVANCE_BCS_BATCH(batch);
1155 }
1156
1157 static void
1158 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1159                               struct decode_state *decode_state,
1160                               struct gen7_mfd_context *gen7_mfd_context)
1161 {
1162     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1163     VAPictureParameterBufferMPEG2 *pic_param;
1164     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1165     dri_bo *slice_data_bo;
1166     int i, j;
1167
1168     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1169     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1170
1171     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1172     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1173     intel_batchbuffer_emit_mi_flush(batch);
1174     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1175     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1176     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1177     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1178     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1179     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1180
1181     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1182         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1183             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1184
1185     for (j = 0; j < decode_state->num_slice_params; j++) {
1186         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1187         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1188         slice_data_bo = decode_state->slice_datas[j]->bo;
1189         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190
1191         if (j == decode_state->num_slice_params - 1)
1192             next_slice_group_param = NULL;
1193         else
1194             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1195
1196         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1197             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1198
1199             if (i < decode_state->slice_params[j]->num_elements - 1)
1200                 next_slice_param = slice_param + 1;
1201             else
1202                 next_slice_param = next_slice_group_param;
1203
1204             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1205             slice_param++;
1206         }
1207     }
1208
1209     intel_batchbuffer_end_atomic(batch);
1210     intel_batchbuffer_flush(batch);
1211 }
1212
1213 static const int va_to_gen7_vc1_pic_type[5] = {
1214     GEN7_VC1_I_PICTURE,
1215     GEN7_VC1_P_PICTURE,
1216     GEN7_VC1_B_PICTURE,
1217     GEN7_VC1_BI_PICTURE,
1218     GEN7_VC1_P_PICTURE,
1219 };
1220
1221 static const int va_to_gen7_vc1_mv[4] = {
1222     1, /* 1-MV */
1223     2, /* 1-MV half-pel */
1224     3, /* 1-MV half-pef bilinear */
1225     0, /* Mixed MV */
1226 };
1227
1228 static const int b_picture_scale_factor[21] = {
1229     128, 85,  170, 64,  192,
1230     51,  102, 153, 204, 43,
1231     215, 37,  74,  111, 148,
1232     185, 222, 32,  96,  160, 
1233     224,
1234 };
1235
1236 static const int va_to_gen7_vc1_condover[3] = {
1237     0,
1238     2,
1239     3
1240 };
1241
1242 static const int va_to_gen7_vc1_profile[4] = {
1243     GEN7_VC1_SIMPLE_PROFILE,
1244     GEN7_VC1_MAIN_PROFILE,
1245     GEN7_VC1_RESERVED_PROFILE,
1246     GEN7_VC1_ADVANCED_PROFILE
1247 };
1248
1249 static void 
1250 gen8_mfd_free_vc1_surface(void **data)
1251 {
1252     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1253
1254     if (!gen7_vc1_surface)
1255         return;
1256
1257     dri_bo_unreference(gen7_vc1_surface->dmv);
1258     free(gen7_vc1_surface);
1259     *data = NULL;
1260 }
1261
1262 static void
1263 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1264                           VAPictureParameterBufferVC1 *pic_param,
1265                           struct object_surface *obj_surface)
1266 {
1267     struct i965_driver_data *i965 = i965_driver_data(ctx);
1268     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1269     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1270     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1271
1272     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1273
1274     if (!gen7_vc1_surface) {
1275         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1276         assert((obj_surface->size & 0x3f) == 0);
1277         obj_surface->private_data = gen7_vc1_surface;
1278     }
1279
1280     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1281
1282     if (gen7_vc1_surface->dmv == NULL) {
1283         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1284                                              "direct mv w/r buffer",
1285                                              width_in_mbs * height_in_mbs * 64,
1286                                              0x1000);
1287     }
1288 }
1289
1290 static void
1291 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1292                          struct decode_state *decode_state,
1293                          struct gen7_mfd_context *gen7_mfd_context)
1294 {
1295     VAPictureParameterBufferVC1 *pic_param;
1296     struct i965_driver_data *i965 = i965_driver_data(ctx);
1297     struct object_surface *obj_surface;
1298     dri_bo *bo;
1299     int width_in_mbs;
1300     int picture_type;
1301
1302     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1303     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1304     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1305     picture_type = pic_param->picture_fields.bits.picture_type;
1306  
1307     intel_update_vc1_frame_store_index(ctx,
1308                                        decode_state,
1309                                        pic_param,
1310                                        gen7_mfd_context->reference_surface);
1311
1312     /* Current decoded picture */
1313     obj_surface = decode_state->render_object;
1314     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1315     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1316
1317     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1318     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1319     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1320     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1321
1322     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1323     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1324     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1325     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1326
1327     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1328     bo = dri_bo_alloc(i965->intel.bufmgr,
1329                       "intra row store",
1330                       width_in_mbs * 64,
1331                       0x1000);
1332     assert(bo);
1333     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1334     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1335
1336     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1337     bo = dri_bo_alloc(i965->intel.bufmgr,
1338                       "deblocking filter row store",
1339                       width_in_mbs * 7 * 64,
1340                       0x1000);
1341     assert(bo);
1342     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1343     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1344
1345     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1346     bo = dri_bo_alloc(i965->intel.bufmgr,
1347                       "bsd mpc row store",
1348                       width_in_mbs * 96,
1349                       0x1000);
1350     assert(bo);
1351     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1352     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1353
1354     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1355
1356     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1357     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1358     
1359     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1360         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1361         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1362         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1363         int src_w, src_h;
1364         uint8_t *src = NULL, *dst = NULL;
1365
1366         assert(decode_state->bit_plane->buffer);
1367         src = decode_state->bit_plane->buffer;
1368
1369         bo = dri_bo_alloc(i965->intel.bufmgr,
1370                           "VC-1 Bitplane",
1371                           bitplane_width * height_in_mbs,
1372                           0x1000);
1373         assert(bo);
1374         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1375
1376         dri_bo_map(bo, True);
1377         assert(bo->virtual);
1378         dst = bo->virtual;
1379
1380         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1381             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1382                 int src_index, dst_index;
1383                 int src_shift;
1384                 uint8_t src_value;
1385
1386                 src_index = (src_h * width_in_mbs + src_w) / 2;
1387                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1388                 src_value = ((src[src_index] >> src_shift) & 0xf);
1389
1390                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1391                     src_value |= 0x2;
1392                 }
1393
1394                 dst_index = src_w / 2;
1395                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1396             }
1397
1398             if (src_w & 1)
1399                 dst[src_w / 2] >>= 4;
1400
1401             dst += bitplane_width;
1402         }
1403
1404         dri_bo_unmap(bo);
1405     } else
1406         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1407 }
1408
1409 static void
1410 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1411                        struct decode_state *decode_state,
1412                        struct gen7_mfd_context *gen7_mfd_context)
1413 {
1414     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1415     VAPictureParameterBufferVC1 *pic_param;
1416     struct object_surface *obj_surface;
1417     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1418     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1419     int unified_mv_mode;
1420     int ref_field_pic_polarity = 0;
1421     int scale_factor = 0;
1422     int trans_ac_y = 0;
1423     int dmv_surface_valid = 0;
1424     int brfd = 0;
1425     int fcm = 0;
1426     int picture_type;
1427     int profile;
1428     int overlap;
1429     int interpolation_mode = 0;
1430
1431     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1432     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1433
1434     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1435     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1436     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1437     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1438     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1439     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1440     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1441     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1442
1443     if (dquant == 0) {
1444         alt_pquant_config = 0;
1445         alt_pquant_edge_mask = 0;
1446     } else if (dquant == 2) {
1447         alt_pquant_config = 1;
1448         alt_pquant_edge_mask = 0xf;
1449     } else {
1450         assert(dquant == 1);
1451         if (dquantfrm == 0) {
1452             alt_pquant_config = 0;
1453             alt_pquant_edge_mask = 0;
1454             alt_pq = 0;
1455         } else {
1456             assert(dquantfrm == 1);
1457             alt_pquant_config = 1;
1458
1459             switch (dqprofile) {
1460             case 3:
1461                 if (dqbilevel == 0) {
1462                     alt_pquant_config = 2;
1463                     alt_pquant_edge_mask = 0;
1464                 } else {
1465                     assert(dqbilevel == 1);
1466                     alt_pquant_config = 3;
1467                     alt_pquant_edge_mask = 0;
1468                 }
1469                 break;
1470                 
1471             case 0:
1472                 alt_pquant_edge_mask = 0xf;
1473                 break;
1474
1475             case 1:
1476                 if (dqdbedge == 3)
1477                     alt_pquant_edge_mask = 0x9;
1478                 else
1479                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1480
1481                 break;
1482
1483             case 2:
1484                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1485                 break;
1486
1487             default:
1488                 assert(0);
1489             }
1490         }
1491     }
1492
1493     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1494         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1495         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1496     } else {
1497         assert(pic_param->mv_fields.bits.mv_mode < 4);
1498         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1499     }
1500
1501     if (pic_param->sequence_fields.bits.interlace == 1 &&
1502         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1503         /* FIXME: calculate reference field picture polarity */
1504         assert(0);
1505         ref_field_pic_polarity = 0;
1506     }
1507
1508     if (pic_param->b_picture_fraction < 21)
1509         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1510
1511     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1512     
1513     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1514         picture_type == GEN7_VC1_I_PICTURE)
1515         picture_type = GEN7_VC1_BI_PICTURE;
1516
1517     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1518         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1519     else {
1520         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1521
1522         /*
1523          * 8.3.6.2.1 Transform Type Selection
1524          * If variable-sized transform coding is not enabled,
1525          * then the 8x8 transform shall be used for all blocks.
1526          * it is also MFX_VC1_PIC_STATE requirement.
1527          */
1528         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1529             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1530             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1531         }
1532     }
1533
1534     if (picture_type == GEN7_VC1_B_PICTURE) {
1535         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1536
1537         obj_surface = decode_state->reference_objects[1];
1538
1539         if (obj_surface)
1540             gen7_vc1_surface = obj_surface->private_data;
1541
1542         if (!gen7_vc1_surface || 
1543             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1544              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1545             dmv_surface_valid = 0;
1546         else
1547             dmv_surface_valid = 1;
1548     }
1549
1550     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1551
1552     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1553         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1554     else {
1555         if (pic_param->picture_fields.bits.top_field_first)
1556             fcm = 2;
1557         else
1558             fcm = 3;
1559     }
1560
1561     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1562         brfd = pic_param->reference_fields.bits.reference_distance;
1563         brfd = (scale_factor * brfd) >> 8;
1564         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1565
1566         if (brfd < 0)
1567             brfd = 0;
1568     }
1569
1570     overlap = pic_param->sequence_fields.bits.overlap;
1571
1572     if (overlap) {
1573         overlap = 0;
1574         if (profile != GEN7_VC1_ADVANCED_PROFILE){
1575             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1576                 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1577                 overlap = 1;
1578             }
1579         }else {
1580             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1581                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1582                 overlap = 1;
1583             }
1584             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1585                 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1586                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1587                     overlap = 1;
1588                 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1589                            va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1590                     overlap = 1;
1591                 }
1592             }
1593         }
1594     } 
1595
1596     assert(pic_param->conditional_overlap_flag < 3);
1597     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1598
1599     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1600         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1601          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1602         interpolation_mode = 9; /* Half-pel bilinear */
1603     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1604              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1605               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1606         interpolation_mode = 1; /* Half-pel bicubic */
1607     else
1608         interpolation_mode = 0; /* Quarter-pel bicubic */
1609
1610     BEGIN_BCS_BATCH(batch, 6);
1611     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1612     OUT_BCS_BATCH(batch,
1613                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1614                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1615     OUT_BCS_BATCH(batch,
1616                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1617                   dmv_surface_valid << 15 |
1618                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1619                   pic_param->rounding_control << 13 |
1620                   pic_param->sequence_fields.bits.syncmarker << 12 |
1621                   interpolation_mode << 8 |
1622                   0 << 7 | /* FIXME: scale up or down ??? */
1623                   pic_param->range_reduction_frame << 6 |
1624                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1625                   overlap << 4 |
1626                   !pic_param->picture_fields.bits.is_first_field << 3 |
1627                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1628     OUT_BCS_BATCH(batch,
1629                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1630                   picture_type << 26 |
1631                   fcm << 24 |
1632                   alt_pq << 16 |
1633                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1634                   scale_factor << 0);
1635     OUT_BCS_BATCH(batch,
1636                   unified_mv_mode << 28 |
1637                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1638                   pic_param->fast_uvmc_flag << 26 |
1639                   ref_field_pic_polarity << 25 |
1640                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1641                   pic_param->reference_fields.bits.reference_distance << 20 |
1642                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1643                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1644                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1645                   alt_pquant_edge_mask << 4 |
1646                   alt_pquant_config << 2 |
1647                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1648                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1649     OUT_BCS_BATCH(batch,
1650                   !!pic_param->bitplane_present.value << 31 |
1651                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1652                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1653                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1654                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1655                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1656                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1657                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1658                   pic_param->mv_fields.bits.mv_table << 20 |
1659                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1660                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1661                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1662                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1663                   pic_param->mb_mode_table << 8 |
1664                   trans_ac_y << 6 |
1665                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1666                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1667                   pic_param->cbp_table << 0);
1668     ADVANCE_BCS_BATCH(batch);
1669 }
1670
1671 static void
1672 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1673                              struct decode_state *decode_state,
1674                              struct gen7_mfd_context *gen7_mfd_context)
1675 {
1676     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1677     VAPictureParameterBufferVC1 *pic_param;
1678     int intensitycomp_single;
1679
1680     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1681     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1682
1683     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1684     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1685     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1686
1687     BEGIN_BCS_BATCH(batch, 6);
1688     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1689     OUT_BCS_BATCH(batch,
1690                   0 << 14 | /* FIXME: double ??? */
1691                   0 << 12 |
1692                   intensitycomp_single << 10 |
1693                   intensitycomp_single << 8 |
1694                   0 << 4 | /* FIXME: interlace mode */
1695                   0);
1696     OUT_BCS_BATCH(batch,
1697                   pic_param->luma_shift << 16 |
1698                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1699     OUT_BCS_BATCH(batch, 0);
1700     OUT_BCS_BATCH(batch, 0);
1701     OUT_BCS_BATCH(batch, 0);
1702     ADVANCE_BCS_BATCH(batch);
1703 }
1704
1705 static void
1706 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1707                               struct decode_state *decode_state,
1708                               struct gen7_mfd_context *gen7_mfd_context)
1709 {
1710     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1711     struct object_surface *obj_surface;
1712     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1713
1714     obj_surface = decode_state->render_object;
1715
1716     if (obj_surface && obj_surface->private_data) {
1717         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1718     }
1719
1720     obj_surface = decode_state->reference_objects[1];
1721
1722     if (obj_surface && obj_surface->private_data) {
1723         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1724     }
1725
1726     BEGIN_BCS_BATCH(batch, 7);
1727     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1728
1729     if (dmv_write_buffer)
1730         OUT_BCS_RELOC(batch, dmv_write_buffer,
1731                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1732                       0);
1733     else
1734         OUT_BCS_BATCH(batch, 0);
1735
1736     OUT_BCS_BATCH(batch, 0);
1737     OUT_BCS_BATCH(batch, 0);
1738
1739     if (dmv_read_buffer)
1740         OUT_BCS_RELOC(batch, dmv_read_buffer,
1741                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1742                       0);
1743     else
1744         OUT_BCS_BATCH(batch, 0);
1745     
1746     OUT_BCS_BATCH(batch, 0);
1747     OUT_BCS_BATCH(batch, 0);
1748                   
1749     ADVANCE_BCS_BATCH(batch);
1750 }
1751
1752 static int
1753 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1754 {
1755     int out_slice_data_bit_offset;
1756     int slice_header_size = in_slice_data_bit_offset / 8;
1757     int i, j;
1758
1759     if (profile != 3)
1760         out_slice_data_bit_offset = in_slice_data_bit_offset;
1761     else {
1762         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1763             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1764                 i++, j += 2;
1765             }
1766         }
1767
1768         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1769     }
1770
1771     return out_slice_data_bit_offset;
1772 }
1773
1774 static void
1775 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1776                         VAPictureParameterBufferVC1 *pic_param,
1777                         VASliceParameterBufferVC1 *slice_param,
1778                         VASliceParameterBufferVC1 *next_slice_param,
1779                         dri_bo *slice_data_bo,
1780                         struct gen7_mfd_context *gen7_mfd_context)
1781 {
1782     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1783     int next_slice_start_vert_pos;
1784     int macroblock_offset;
1785     uint8_t *slice_data = NULL;
1786
1787     dri_bo_map(slice_data_bo, 0);
1788     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1789     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1790                                                                slice_param->macroblock_offset,
1791                                                                pic_param->sequence_fields.bits.profile);
1792     dri_bo_unmap(slice_data_bo);
1793
1794     if (next_slice_param)
1795         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1796     else
1797         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1798
1799     BEGIN_BCS_BATCH(batch, 5);
1800     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1801     OUT_BCS_BATCH(batch, 
1802                   slice_param->slice_data_size - (macroblock_offset >> 3));
1803     OUT_BCS_BATCH(batch, 
1804                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1805     OUT_BCS_BATCH(batch,
1806                   slice_param->slice_vertical_position << 16 |
1807                   next_slice_start_vert_pos << 0);
1808     OUT_BCS_BATCH(batch,
1809                   (macroblock_offset & 0x7));
1810     ADVANCE_BCS_BATCH(batch);
1811 }
1812
1813 static void
1814 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1815                             struct decode_state *decode_state,
1816                             struct gen7_mfd_context *gen7_mfd_context)
1817 {
1818     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1819     VAPictureParameterBufferVC1 *pic_param;
1820     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1821     dri_bo *slice_data_bo;
1822     int i, j;
1823
1824     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1825     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1826
1827     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1828     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1829     intel_batchbuffer_emit_mi_flush(batch);
1830     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1831     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1832     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1833     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1834     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1835     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1836     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1837
1838     for (j = 0; j < decode_state->num_slice_params; j++) {
1839         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1840         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1841         slice_data_bo = decode_state->slice_datas[j]->bo;
1842         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1843
1844         if (j == decode_state->num_slice_params - 1)
1845             next_slice_group_param = NULL;
1846         else
1847             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1848
1849         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1850             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1851
1852             if (i < decode_state->slice_params[j]->num_elements - 1)
1853                 next_slice_param = slice_param + 1;
1854             else
1855                 next_slice_param = next_slice_group_param;
1856
1857             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1858             slice_param++;
1859         }
1860     }
1861
1862     intel_batchbuffer_end_atomic(batch);
1863     intel_batchbuffer_flush(batch);
1864 }
1865
1866 static void
1867 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1868                           struct decode_state *decode_state,
1869                           struct gen7_mfd_context *gen7_mfd_context)
1870 {
1871     struct object_surface *obj_surface;
1872     VAPictureParameterBufferJPEGBaseline *pic_param;
1873     int subsampling = SUBSAMPLE_YUV420;
1874     int fourcc = VA_FOURCC_IMC3;
1875
1876     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1877
1878     if (pic_param->num_components == 1)
1879         subsampling = SUBSAMPLE_YUV400;
1880     else if (pic_param->num_components == 3) {
1881         int h1 = pic_param->components[0].h_sampling_factor;
1882         int h2 = pic_param->components[1].h_sampling_factor;
1883         int h3 = pic_param->components[2].h_sampling_factor;
1884         int v1 = pic_param->components[0].v_sampling_factor;
1885         int v2 = pic_param->components[1].v_sampling_factor;
1886         int v3 = pic_param->components[2].v_sampling_factor;
1887
1888         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1889             v1 == 2 && v2 == 1 && v3 == 1) {
1890             subsampling = SUBSAMPLE_YUV420;
1891             fourcc = VA_FOURCC_IMC3;
1892         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1893                    v1 == 1 && v2 == 1 && v3 == 1) {
1894             subsampling = SUBSAMPLE_YUV422H;
1895             fourcc = VA_FOURCC_422H;
1896         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1897                    v1 == 1 && v2 == 1 && v3 == 1) {
1898             subsampling = SUBSAMPLE_YUV444;
1899             fourcc = VA_FOURCC_444P;
1900         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1901                    v1 == 1 && v2 == 1 && v3 == 1) {
1902             subsampling = SUBSAMPLE_YUV411;
1903             fourcc = VA_FOURCC_411P;
1904         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1905                    v1 == 2 && v2 == 1 && v3 == 1) {
1906             subsampling = SUBSAMPLE_YUV422V;
1907             fourcc = VA_FOURCC_422V;
1908         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1909                    v1 == 2 && v2 == 2 && v3 == 2) {
1910             subsampling = SUBSAMPLE_YUV422H;
1911             fourcc = VA_FOURCC_422H;
1912         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1913                    v1 == 2 && v2 == 1 && v3 == 1) {
1914             subsampling = SUBSAMPLE_YUV422V;
1915             fourcc = VA_FOURCC_422V;
1916         } else
1917             assert(0);
1918     }
1919     else {
1920         assert(0);
1921     }
1922
1923     /* Current decoded picture */
1924     obj_surface = decode_state->render_object;
1925     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1926
1927     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1928     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1929     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1930     gen7_mfd_context->pre_deblocking_output.valid = 1;
1931
1932     gen7_mfd_context->post_deblocking_output.bo = NULL;
1933     gen7_mfd_context->post_deblocking_output.valid = 0;
1934
1935     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1936     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1937
1938     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1939     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1940
1941     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1942     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1943
1944     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1945     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1946
1947     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1948     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1949 }
1950
1951 static const int va_to_gen7_jpeg_rotation[4] = {
1952     GEN7_JPEG_ROTATION_0,
1953     GEN7_JPEG_ROTATION_90,
1954     GEN7_JPEG_ROTATION_180,
1955     GEN7_JPEG_ROTATION_270
1956 };
1957
1958 static void
1959 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1960                         struct decode_state *decode_state,
1961                         struct gen7_mfd_context *gen7_mfd_context)
1962 {
1963     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1964     VAPictureParameterBufferJPEGBaseline *pic_param;
1965     int chroma_type = GEN7_YUV420;
1966     int frame_width_in_blks;
1967     int frame_height_in_blks;
1968
1969     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1970     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1971
1972     if (pic_param->num_components == 1)
1973         chroma_type = GEN7_YUV400;
1974     else if (pic_param->num_components == 3) {
1975         int h1 = pic_param->components[0].h_sampling_factor;
1976         int h2 = pic_param->components[1].h_sampling_factor;
1977         int h3 = pic_param->components[2].h_sampling_factor;
1978         int v1 = pic_param->components[0].v_sampling_factor;
1979         int v2 = pic_param->components[1].v_sampling_factor;
1980         int v3 = pic_param->components[2].v_sampling_factor;
1981
1982         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1983             v1 == 2 && v2 == 1 && v3 == 1)
1984             chroma_type = GEN7_YUV420;
1985         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1986                  v1 == 1 && v2 == 1 && v3 == 1)
1987             chroma_type = GEN7_YUV422H_2Y;
1988         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1989                  v1 == 1 && v2 == 1 && v3 == 1)
1990             chroma_type = GEN7_YUV444;
1991         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1992                  v1 == 1 && v2 == 1 && v3 == 1)
1993             chroma_type = GEN7_YUV411;
1994         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1995                  v1 == 2 && v2 == 1 && v3 == 1)
1996             chroma_type = GEN7_YUV422V_2Y;
1997         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1998                  v1 == 2 && v2 == 2 && v3 == 2)
1999             chroma_type = GEN7_YUV422H_4Y;
2000         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2001                  v1 == 2 && v2 == 1 && v3 == 1)
2002             chroma_type = GEN7_YUV422V_4Y;
2003         else
2004             assert(0);
2005     }
2006
2007     if (chroma_type == GEN7_YUV400 ||
2008         chroma_type == GEN7_YUV444 ||
2009         chroma_type == GEN7_YUV422V_2Y) {
2010         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2011         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2012     } else if (chroma_type == GEN7_YUV411) {
2013         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2014         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2015     } else {
2016         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2017         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2018     }
2019
2020     BEGIN_BCS_BATCH(batch, 3);
2021     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2022     OUT_BCS_BATCH(batch,
2023                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2024                   (chroma_type << 0));
2025     OUT_BCS_BATCH(batch,
2026                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2027                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2028     ADVANCE_BCS_BATCH(batch);
2029 }
2030
2031 static const int va_to_gen7_jpeg_hufftable[2] = {
2032     MFX_HUFFTABLE_ID_Y,
2033     MFX_HUFFTABLE_ID_UV
2034 };
2035
2036 static void
2037 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2038                                struct decode_state *decode_state,
2039                                struct gen7_mfd_context *gen7_mfd_context,
2040                                int num_tables)
2041 {
2042     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2043     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2044     int index;
2045
2046     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2047         return;
2048
2049     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2050
2051     for (index = 0; index < num_tables; index++) {
2052         int id = va_to_gen7_jpeg_hufftable[index];
2053         if (!huffman_table->load_huffman_table[index])
2054             continue;
2055         BEGIN_BCS_BATCH(batch, 53);
2056         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2057         OUT_BCS_BATCH(batch, id);
2058         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2059         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2060         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2061         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2062         ADVANCE_BCS_BATCH(batch);
2063     }
2064 }
2065
2066 static const int va_to_gen7_jpeg_qm[5] = {
2067     -1,
2068     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2069     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2070     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2071     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2072 };
2073
2074 static void
2075 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2076                        struct decode_state *decode_state,
2077                        struct gen7_mfd_context *gen7_mfd_context)
2078 {
2079     VAPictureParameterBufferJPEGBaseline *pic_param;
2080     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2081     int index;
2082
2083     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2084         return;
2085
2086     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2087     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2088
2089     assert(pic_param->num_components <= 3);
2090
2091     for (index = 0; index < pic_param->num_components; index++) {
2092         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2093         int qm_type;
2094         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2095         unsigned char raster_qm[64];
2096         int j;
2097
2098         if (id > 4 || id < 1)
2099             continue;
2100
2101         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2102             continue;
2103
2104         qm_type = va_to_gen7_jpeg_qm[id];
2105
2106         for (j = 0; j < 64; j++)
2107             raster_qm[zigzag_direct[j]] = qm[j];
2108
2109         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2110     }
2111 }
2112
2113 static void
2114 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2115                          VAPictureParameterBufferJPEGBaseline *pic_param,
2116                          VASliceParameterBufferJPEGBaseline *slice_param,
2117                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2118                          dri_bo *slice_data_bo,
2119                          struct gen7_mfd_context *gen7_mfd_context)
2120 {
2121     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2122     int scan_component_mask = 0;
2123     int i;
2124
2125     assert(slice_param->num_components > 0);
2126     assert(slice_param->num_components < 4);
2127     assert(slice_param->num_components <= pic_param->num_components);
2128
2129     for (i = 0; i < slice_param->num_components; i++) {
2130         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2131         case 1:
2132             scan_component_mask |= (1 << 0);
2133             break;
2134         case 2:
2135             scan_component_mask |= (1 << 1);
2136             break;
2137         case 3:
2138             scan_component_mask |= (1 << 2);
2139             break;
2140         default:
2141             assert(0);
2142             break;
2143         }
2144     }
2145
2146     BEGIN_BCS_BATCH(batch, 6);
2147     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2148     OUT_BCS_BATCH(batch, 
2149                   slice_param->slice_data_size);
2150     OUT_BCS_BATCH(batch, 
2151                   slice_param->slice_data_offset);
2152     OUT_BCS_BATCH(batch,
2153                   slice_param->slice_horizontal_position << 16 |
2154                   slice_param->slice_vertical_position << 0);
2155     OUT_BCS_BATCH(batch,
2156                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2157                   (scan_component_mask << 27) |                 /* scan components */
2158                   (0 << 26) |   /* disable interrupt allowed */
2159                   (slice_param->num_mcus << 0));                /* MCU count */
2160     OUT_BCS_BATCH(batch,
2161                   (slice_param->restart_interval << 0));    /* RestartInterval */
2162     ADVANCE_BCS_BATCH(batch);
2163 }
2164
2165 /* Workaround for JPEG decoding on Ivybridge */
2166 #ifdef JPEG_WA
2167
2168 static struct {
2169     int width;
2170     int height;
2171     unsigned char data[32];
2172     int data_size;
2173     int data_bit_offset;
2174     int qp;
2175 } gen7_jpeg_wa_clip = {
2176     16,
2177     16,
2178     {
2179         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2180         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2181     },
2182     14,
2183     40,
2184     28,
2185 };
2186
2187 static void
2188 gen8_jpeg_wa_init(VADriverContextP ctx,
2189                   struct gen7_mfd_context *gen7_mfd_context)
2190 {
2191     struct i965_driver_data *i965 = i965_driver_data(ctx);
2192     VAStatus status;
2193     struct object_surface *obj_surface;
2194
2195     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2196         i965_DestroySurfaces(ctx,
2197                              &gen7_mfd_context->jpeg_wa_surface_id,
2198                              1);
2199
2200     status = i965_CreateSurfaces(ctx,
2201                                  gen7_jpeg_wa_clip.width,
2202                                  gen7_jpeg_wa_clip.height,
2203                                  VA_RT_FORMAT_YUV420,
2204                                  1,
2205                                  &gen7_mfd_context->jpeg_wa_surface_id);
2206     assert(status == VA_STATUS_SUCCESS);
2207
2208     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2209     assert(obj_surface);
2210     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2211     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2212
2213     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2214         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2215                                                                "JPEG WA data",
2216                                                                0x1000,
2217                                                                0x1000);
2218         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2219                        0,
2220                        gen7_jpeg_wa_clip.data_size,
2221                        gen7_jpeg_wa_clip.data);
2222     }
2223 }
2224
2225 static void
2226 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2227                               struct gen7_mfd_context *gen7_mfd_context)
2228 {
2229     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2230
2231     BEGIN_BCS_BATCH(batch, 5);
2232     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2233     OUT_BCS_BATCH(batch,
2234                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2235                   (MFD_MODE_VLD << 15) | /* VLD mode */
2236                   (0 << 10) | /* disable Stream-Out */
2237                   (0 << 9)  | /* Post Deblocking Output */
2238                   (1 << 8)  | /* Pre Deblocking Output */
2239                   (0 << 5)  | /* not in stitch mode */
2240                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2241                   (MFX_FORMAT_AVC << 0));
2242     OUT_BCS_BATCH(batch,
2243                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2244                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2245                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2246                   (0 << 1)  |
2247                   (0 << 0));
2248     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2249     OUT_BCS_BATCH(batch, 0); /* reserved */
2250     ADVANCE_BCS_BATCH(batch);
2251 }
2252
2253 static void
2254 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2255                            struct gen7_mfd_context *gen7_mfd_context)
2256 {
2257     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2258     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2259
2260     BEGIN_BCS_BATCH(batch, 6);
2261     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2262     OUT_BCS_BATCH(batch, 0);
2263     OUT_BCS_BATCH(batch,
2264                   ((obj_surface->orig_width - 1) << 18) |
2265                   ((obj_surface->orig_height - 1) << 4));
2266     OUT_BCS_BATCH(batch,
2267                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2268                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2269                   (0 << 22) | /* surface object control state, ignored */
2270                   ((obj_surface->width - 1) << 3) | /* pitch */
2271                   (0 << 2)  | /* must be 0 */
2272                   (1 << 1)  | /* must be tiled */
2273                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2274     OUT_BCS_BATCH(batch,
2275                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2276                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2277     OUT_BCS_BATCH(batch,
2278                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2279                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2280     ADVANCE_BCS_BATCH(batch);
2281 }
2282
2283 static void
2284 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2285                                  struct gen7_mfd_context *gen7_mfd_context)
2286 {
2287     struct i965_driver_data *i965 = i965_driver_data(ctx);
2288     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2289     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2290     dri_bo *intra_bo;
2291     int i;
2292
2293     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2294                             "intra row store",
2295                             128 * 64,
2296                             0x1000);
2297
2298     BEGIN_BCS_BATCH(batch, 61);
2299     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2300     OUT_BCS_RELOC(batch,
2301                   obj_surface->bo,
2302                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2303                   0);
2304         OUT_BCS_BATCH(batch, 0);
2305         OUT_BCS_BATCH(batch, 0);
2306     
2307
2308     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2309         OUT_BCS_BATCH(batch, 0);
2310         OUT_BCS_BATCH(batch, 0);
2311
2312         /* uncompressed-video & stream out 7-12 */
2313     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2314     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2315         OUT_BCS_BATCH(batch, 0);
2316         OUT_BCS_BATCH(batch, 0);
2317         OUT_BCS_BATCH(batch, 0);
2318         OUT_BCS_BATCH(batch, 0);
2319
2320         /* the DW 13-15 is for intra row store scratch */
2321     OUT_BCS_RELOC(batch,
2322                   intra_bo,
2323                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2324                   0);
2325         OUT_BCS_BATCH(batch, 0);
2326         OUT_BCS_BATCH(batch, 0);
2327
2328         /* the DW 16-18 is for deblocking filter */ 
2329     OUT_BCS_BATCH(batch, 0);
2330         OUT_BCS_BATCH(batch, 0);
2331         OUT_BCS_BATCH(batch, 0);
2332
2333     /* DW 19..50 */
2334     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2335         OUT_BCS_BATCH(batch, 0);
2336         OUT_BCS_BATCH(batch, 0);
2337     }
2338     OUT_BCS_BATCH(batch, 0);
2339
2340         /* the DW52-54 is for mb status address */
2341     OUT_BCS_BATCH(batch, 0);
2342         OUT_BCS_BATCH(batch, 0);
2343         OUT_BCS_BATCH(batch, 0);
2344         /* the DW56-60 is for ILDB & second ILDB address */
2345     OUT_BCS_BATCH(batch, 0);
2346         OUT_BCS_BATCH(batch, 0);
2347         OUT_BCS_BATCH(batch, 0);
2348     OUT_BCS_BATCH(batch, 0);
2349         OUT_BCS_BATCH(batch, 0);
2350         OUT_BCS_BATCH(batch, 0);
2351
2352     ADVANCE_BCS_BATCH(batch);
2353
2354     dri_bo_unreference(intra_bo);
2355 }
2356
2357 static void
2358 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2359                                      struct gen7_mfd_context *gen7_mfd_context)
2360 {
2361     struct i965_driver_data *i965 = i965_driver_data(ctx);
2362     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2363     dri_bo *bsd_mpc_bo, *mpr_bo;
2364
2365     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2366                               "bsd mpc row store",
2367                               11520, /* 1.5 * 120 * 64 */
2368                               0x1000);
2369
2370     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2371                           "mpr row store",
2372                           7680, /* 1. 0 * 120 * 64 */
2373                           0x1000);
2374
2375     BEGIN_BCS_BATCH(batch, 10);
2376     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2377
2378     OUT_BCS_RELOC(batch,
2379                   bsd_mpc_bo,
2380                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2381                   0);
2382
2383     OUT_BCS_BATCH(batch, 0);
2384     OUT_BCS_BATCH(batch, 0);
2385
2386     OUT_BCS_RELOC(batch,
2387                   mpr_bo,
2388                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2389                   0);
2390     OUT_BCS_BATCH(batch, 0);
2391     OUT_BCS_BATCH(batch, 0);
2392
2393     OUT_BCS_BATCH(batch, 0);
2394     OUT_BCS_BATCH(batch, 0);
2395     OUT_BCS_BATCH(batch, 0);
2396
2397     ADVANCE_BCS_BATCH(batch);
2398
2399     dri_bo_unreference(bsd_mpc_bo);
2400     dri_bo_unreference(mpr_bo);
2401 }
2402
2403 static void
2404 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2405                           struct gen7_mfd_context *gen7_mfd_context)
2406 {
2407
2408 }
2409
2410 static void
2411 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2412                            struct gen7_mfd_context *gen7_mfd_context)
2413 {
2414     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2415     int img_struct = 0;
2416     int mbaff_frame_flag = 0;
2417     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2418
2419     BEGIN_BCS_BATCH(batch, 16);
2420     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2421     OUT_BCS_BATCH(batch, 
2422                   width_in_mbs * height_in_mbs);
2423     OUT_BCS_BATCH(batch, 
2424                   ((height_in_mbs - 1) << 16) | 
2425                   ((width_in_mbs - 1) << 0));
2426     OUT_BCS_BATCH(batch, 
2427                   (0 << 24) |
2428                   (0 << 16) |
2429                   (0 << 14) |
2430                   (0 << 13) |
2431                   (0 << 12) | /* differ from GEN6 */
2432                   (0 << 10) |
2433                   (img_struct << 8));
2434     OUT_BCS_BATCH(batch,
2435                   (1 << 10) | /* 4:2:0 */
2436                   (1 << 7) |  /* CABAC */
2437                   (0 << 6) |
2438                   (0 << 5) |
2439                   (0 << 4) |
2440                   (0 << 3) |
2441                   (1 << 2) |
2442                   (mbaff_frame_flag << 1) |
2443                   (0 << 0));
2444     OUT_BCS_BATCH(batch, 0);
2445     OUT_BCS_BATCH(batch, 0);
2446     OUT_BCS_BATCH(batch, 0);
2447     OUT_BCS_BATCH(batch, 0);
2448     OUT_BCS_BATCH(batch, 0);
2449     OUT_BCS_BATCH(batch, 0);
2450     OUT_BCS_BATCH(batch, 0);
2451     OUT_BCS_BATCH(batch, 0);
2452     OUT_BCS_BATCH(batch, 0);
2453     OUT_BCS_BATCH(batch, 0);
2454     OUT_BCS_BATCH(batch, 0);
2455     ADVANCE_BCS_BATCH(batch);
2456 }
2457
2458 static void
2459 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2460                                   struct gen7_mfd_context *gen7_mfd_context)
2461 {
2462     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2463     int i;
2464
2465     BEGIN_BCS_BATCH(batch, 71);
2466     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2467
2468     /* reference surfaces 0..15 */
2469     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2470         OUT_BCS_BATCH(batch, 0); /* top */
2471         OUT_BCS_BATCH(batch, 0); /* bottom */
2472     }
2473         
2474         OUT_BCS_BATCH(batch, 0);
2475
2476     /* the current decoding frame/field */
2477     OUT_BCS_BATCH(batch, 0); /* top */
2478     OUT_BCS_BATCH(batch, 0);
2479     OUT_BCS_BATCH(batch, 0);
2480
2481     /* POC List */
2482     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2483         OUT_BCS_BATCH(batch, 0);
2484         OUT_BCS_BATCH(batch, 0);
2485     }
2486
2487     OUT_BCS_BATCH(batch, 0);
2488     OUT_BCS_BATCH(batch, 0);
2489
2490     ADVANCE_BCS_BATCH(batch);
2491 }
2492
2493 static void
2494 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2495                                      struct gen7_mfd_context *gen7_mfd_context)
2496 {
2497     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2498
2499     BEGIN_BCS_BATCH(batch, 11);
2500     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2501     OUT_BCS_RELOC(batch,
2502                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2503                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2504                   0);
2505     OUT_BCS_BATCH(batch, 0);
2506     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2507     OUT_BCS_BATCH(batch, 0);
2508     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2509     OUT_BCS_BATCH(batch, 0);
2510     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2511     OUT_BCS_BATCH(batch, 0);
2512     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2513     OUT_BCS_BATCH(batch, 0);
2514     ADVANCE_BCS_BATCH(batch);
2515 }
2516
2517 static void
2518 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2519                             struct gen7_mfd_context *gen7_mfd_context)
2520 {
2521     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2522
2523     /* the input bitsteam format on GEN7 differs from GEN6 */
2524     BEGIN_BCS_BATCH(batch, 6);
2525     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2526     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2527     OUT_BCS_BATCH(batch, 0);
2528     OUT_BCS_BATCH(batch,
2529                   (0 << 31) |
2530                   (0 << 14) |
2531                   (0 << 12) |
2532                   (0 << 10) |
2533                   (0 << 8));
2534     OUT_BCS_BATCH(batch,
2535                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2536                   (0 << 5)  |
2537                   (0 << 4)  |
2538                   (1 << 3) | /* LastSlice Flag */
2539                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2540     OUT_BCS_BATCH(batch, 0);
2541     ADVANCE_BCS_BATCH(batch);
2542 }
2543
2544 static void
2545 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2546                              struct gen7_mfd_context *gen7_mfd_context)
2547 {
2548     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2549     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2550     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2551     int first_mb_in_slice = 0;
2552     int slice_type = SLICE_TYPE_I;
2553
2554     BEGIN_BCS_BATCH(batch, 11);
2555     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2556     OUT_BCS_BATCH(batch, slice_type);
2557     OUT_BCS_BATCH(batch, 
2558                   (num_ref_idx_l1 << 24) |
2559                   (num_ref_idx_l0 << 16) |
2560                   (0 << 8) |
2561                   (0 << 0));
2562     OUT_BCS_BATCH(batch, 
2563                   (0 << 29) |
2564                   (1 << 27) |   /* disable Deblocking */
2565                   (0 << 24) |
2566                   (gen7_jpeg_wa_clip.qp << 16) |
2567                   (0 << 8) |
2568                   (0 << 0));
2569     OUT_BCS_BATCH(batch, 
2570                   (slice_ver_pos << 24) |
2571                   (slice_hor_pos << 16) | 
2572                   (first_mb_in_slice << 0));
2573     OUT_BCS_BATCH(batch,
2574                   (next_slice_ver_pos << 16) |
2575                   (next_slice_hor_pos << 0));
2576     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2577     OUT_BCS_BATCH(batch, 0);
2578     OUT_BCS_BATCH(batch, 0);
2579     OUT_BCS_BATCH(batch, 0);
2580     OUT_BCS_BATCH(batch, 0);
2581     ADVANCE_BCS_BATCH(batch);
2582 }
2583
2584 static void
2585 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2586                  struct gen7_mfd_context *gen7_mfd_context)
2587 {
2588     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2589     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2590     intel_batchbuffer_emit_mi_flush(batch);
2591     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2592     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2593     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2594     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2595     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2596     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2597     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2598
2599     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2600     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2601     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2602 }
2603
2604 #endif
2605
2606 void
2607 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2608                              struct decode_state *decode_state,
2609                              struct gen7_mfd_context *gen7_mfd_context)
2610 {
2611     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2612     VAPictureParameterBufferJPEGBaseline *pic_param;
2613     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2614     dri_bo *slice_data_bo;
2615     int i, j, max_selector = 0;
2616
2617     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2618     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2619
2620     /* Currently only support Baseline DCT */
2621     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2622     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2623 #ifdef JPEG_WA
2624     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2625 #endif
2626     intel_batchbuffer_emit_mi_flush(batch);
2627     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2628     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2629     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2630     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2631     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2632
2633     for (j = 0; j < decode_state->num_slice_params; j++) {
2634         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2635         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2636         slice_data_bo = decode_state->slice_datas[j]->bo;
2637         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2638
2639         if (j == decode_state->num_slice_params - 1)
2640             next_slice_group_param = NULL;
2641         else
2642             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2643
2644         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2645             int component;
2646
2647             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2648
2649             if (i < decode_state->slice_params[j]->num_elements - 1)
2650                 next_slice_param = slice_param + 1;
2651             else
2652                 next_slice_param = next_slice_group_param;
2653
2654             for (component = 0; component < slice_param->num_components; component++) {
2655                 if (max_selector < slice_param->components[component].dc_table_selector)
2656                     max_selector = slice_param->components[component].dc_table_selector;
2657
2658                 if (max_selector < slice_param->components[component].ac_table_selector)
2659                     max_selector = slice_param->components[component].ac_table_selector;
2660             }
2661
2662             slice_param++;
2663         }
2664     }
2665
2666     assert(max_selector < 2);
2667     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2668
2669     for (j = 0; j < decode_state->num_slice_params; j++) {
2670         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2671         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2672         slice_data_bo = decode_state->slice_datas[j]->bo;
2673         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2674
2675         if (j == decode_state->num_slice_params - 1)
2676             next_slice_group_param = NULL;
2677         else
2678             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2679
2680         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2681             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2682
2683             if (i < decode_state->slice_params[j]->num_elements - 1)
2684                 next_slice_param = slice_param + 1;
2685             else
2686                 next_slice_param = next_slice_group_param;
2687
2688             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2689             slice_param++;
2690         }
2691     }
2692
2693     intel_batchbuffer_end_atomic(batch);
2694     intel_batchbuffer_flush(batch);
2695 }
2696
2697 static const int vp8_dc_qlookup[128] =
2698 {
2699       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2700      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2701      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2702      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2703      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2704      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2705      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2706     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2707 };
2708
2709 static const int vp8_ac_qlookup[128] =
2710 {
2711       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2712      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2713      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2714      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2715      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2716     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2717     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2718     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2719 };
2720
2721 static inline unsigned int vp8_clip_quantization_index(int index)
2722 {
2723     if(index > 127)
2724         return 127;
2725     else if(index <0)
2726         return 0;
2727
2728     return index;
2729 }
2730
2731 static void
2732 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2733                           struct decode_state *decode_state,
2734                           struct gen7_mfd_context *gen7_mfd_context)
2735 {
2736     struct object_surface *obj_surface;
2737     struct i965_driver_data *i965 = i965_driver_data(ctx);
2738     dri_bo *bo;
2739     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2740     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2741     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2742
2743     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2744     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2745
2746     intel_update_vp8_frame_store_index(ctx,
2747                                        decode_state,
2748                                        pic_param,
2749                                        gen7_mfd_context->reference_surface);
2750
2751     /* Current decoded picture */
2752     obj_surface = decode_state->render_object;
2753     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2754
2755     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2756     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2757     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2758     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2759
2760     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2761     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2762     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2763     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2764
2765     intel_ensure_vp8_segmentation_buffer(ctx,
2766         &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2767
2768     /* The same as AVC */
2769     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2770     bo = dri_bo_alloc(i965->intel.bufmgr,
2771                       "intra row store",
2772                       width_in_mbs * 64,
2773                       0x1000);
2774     assert(bo);
2775     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2776     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2777
2778     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2779     bo = dri_bo_alloc(i965->intel.bufmgr,
2780                       "deblocking filter row store",
2781                       width_in_mbs * 64 * 4,
2782                       0x1000);
2783     assert(bo);
2784     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2785     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2786
2787     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2788     bo = dri_bo_alloc(i965->intel.bufmgr,
2789                       "bsd mpc row store",
2790                       width_in_mbs * 64 * 2,
2791                       0x1000);
2792     assert(bo);
2793     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2794     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2795
2796     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2797     bo = dri_bo_alloc(i965->intel.bufmgr,
2798                       "mpr row store",
2799                       width_in_mbs * 64 * 2,
2800                       0x1000);
2801     assert(bo);
2802     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2803     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2804
2805     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2806 }
2807
2808 static void
2809 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2810                        struct decode_state *decode_state,
2811                        struct gen7_mfd_context *gen7_mfd_context)
2812 {
2813     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2814     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2815     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2816     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2817     dri_bo *probs_bo = decode_state->probability_data->bo;
2818     int i, j,log2num;
2819     unsigned int quantization_value[4][6];
2820
2821     /* There is no safe way to error out if the segmentation buffer
2822        could not be allocated. So, instead of aborting, simply decode
2823        something even if the result may look totally inacurate */
2824     const unsigned int enable_segmentation =
2825         pic_param->pic_fields.bits.segmentation_enabled &&
2826         gen7_mfd_context->segmentation_buffer.valid;
2827         
2828     log2num = (int)log2(slice_param->num_of_partitions - 1);
2829
2830     BEGIN_BCS_BATCH(batch, 38);
2831     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2832     OUT_BCS_BATCH(batch,
2833                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2834                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2835     OUT_BCS_BATCH(batch,
2836                   log2num << 24 |
2837                   pic_param->pic_fields.bits.sharpness_level << 16 |
2838                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2839                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2840                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2841                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2842                   (enable_segmentation &&
2843                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2844                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2845                   (enable_segmentation &&
2846                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2847                   (enable_segmentation &&
2848                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2849                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2850                   pic_param->pic_fields.bits.filter_type << 4 |
2851                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2852                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2853
2854     OUT_BCS_BATCH(batch,
2855                   pic_param->loop_filter_level[3] << 24 |
2856                   pic_param->loop_filter_level[2] << 16 |
2857                   pic_param->loop_filter_level[1] <<  8 |
2858                   pic_param->loop_filter_level[0] <<  0);
2859
2860     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2861     for (i = 0; i < 4; i++) {
2862                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2863                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2864                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2865                 /* 101581>>16 is equivalent to 155/100 */
2866                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2867                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2868                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2869
2870                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2871                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2872
2873                 OUT_BCS_BATCH(batch,
2874                       quantization_value[i][0] << 16 | /* Y1AC */
2875                       quantization_value[i][1] <<  0); /* Y1DC */
2876         OUT_BCS_BATCH(batch,
2877                       quantization_value[i][5] << 16 | /* UVAC */
2878                       quantization_value[i][4] <<  0); /* UVDC */
2879         OUT_BCS_BATCH(batch,
2880                       quantization_value[i][3] << 16 | /* Y2AC */
2881                       quantization_value[i][2] <<  0); /* Y2DC */
2882     }
2883
2884     /* CoeffProbability table for non-key frame, DW16-DW18 */
2885     if (probs_bo) {
2886         OUT_BCS_RELOC(batch, probs_bo,
2887                       0, I915_GEM_DOMAIN_INSTRUCTION,
2888                       0);
2889         OUT_BCS_BATCH(batch, 0);
2890         OUT_BCS_BATCH(batch, 0);
2891     } else {
2892         OUT_BCS_BATCH(batch, 0);
2893         OUT_BCS_BATCH(batch, 0);
2894         OUT_BCS_BATCH(batch, 0);
2895     }
2896
2897     OUT_BCS_BATCH(batch,
2898                   pic_param->mb_segment_tree_probs[2] << 16 |
2899                   pic_param->mb_segment_tree_probs[1] <<  8 |
2900                   pic_param->mb_segment_tree_probs[0] <<  0);
2901
2902     OUT_BCS_BATCH(batch,
2903                   pic_param->prob_skip_false << 24 |
2904                   pic_param->prob_intra      << 16 |
2905                   pic_param->prob_last       <<  8 |
2906                   pic_param->prob_gf         <<  0);
2907
2908     OUT_BCS_BATCH(batch,
2909                   pic_param->y_mode_probs[3] << 24 |
2910                   pic_param->y_mode_probs[2] << 16 |
2911                   pic_param->y_mode_probs[1] <<  8 |
2912                   pic_param->y_mode_probs[0] <<  0);
2913
2914     OUT_BCS_BATCH(batch,
2915                   pic_param->uv_mode_probs[2] << 16 |
2916                   pic_param->uv_mode_probs[1] <<  8 |
2917                   pic_param->uv_mode_probs[0] <<  0);
2918     
2919     /* MV update value, DW23-DW32 */
2920     for (i = 0; i < 2; i++) {
2921         for (j = 0; j < 20; j += 4) {
2922             OUT_BCS_BATCH(batch,
2923                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2924                           pic_param->mv_probs[i][j + 2] << 16 |
2925                           pic_param->mv_probs[i][j + 1] <<  8 |
2926                           pic_param->mv_probs[i][j + 0] <<  0);
2927         }
2928     }
2929
2930     OUT_BCS_BATCH(batch,
2931                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2932                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2933                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2934                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2935
2936     OUT_BCS_BATCH(batch,
2937                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2938                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2939                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2940                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2941
2942     /* segmentation id stream base address, DW35-DW37 */
2943     if (enable_segmentation) {
2944         OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2945                       0, I915_GEM_DOMAIN_INSTRUCTION,
2946                       0);
2947         OUT_BCS_BATCH(batch, 0);
2948         OUT_BCS_BATCH(batch, 0);
2949     }
2950     else {
2951         OUT_BCS_BATCH(batch, 0);
2952         OUT_BCS_BATCH(batch, 0);
2953         OUT_BCS_BATCH(batch, 0);
2954     }
2955     ADVANCE_BCS_BATCH(batch);
2956 }
2957
2958 static void
2959 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2960                         VAPictureParameterBufferVP8 *pic_param,
2961                         VASliceParameterBufferVP8 *slice_param,
2962                         dri_bo *slice_data_bo,
2963                         struct gen7_mfd_context *gen7_mfd_context)
2964 {
2965     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2966     int i, log2num;
2967     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2968     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2969     unsigned int partition_size_0 = slice_param->partition_size[0];
2970
2971     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2972     if (used_bits == 8) {
2973         used_bits = 0;
2974         offset += 1;
2975         partition_size_0 -= 1;
2976     }
2977
2978     assert(slice_param->num_of_partitions >= 2);
2979     assert(slice_param->num_of_partitions <= 9);
2980
2981     log2num = (int)log2(slice_param->num_of_partitions - 1);
2982
2983     BEGIN_BCS_BATCH(batch, 22);
2984     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2985     OUT_BCS_BATCH(batch,
2986                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2987                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2988                   log2num << 4 |
2989                   (slice_param->macroblock_offset & 0x7));
2990     OUT_BCS_BATCH(batch,
2991                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2992                   0);
2993
2994     OUT_BCS_BATCH(batch, partition_size_0 + 1);
2995     OUT_BCS_BATCH(batch, offset);
2996     //partion sizes in bytes are present after the above first partition when there are more than one token partition
2997     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2998     for (i = 1; i < 9; i++) {
2999         if (i < slice_param->num_of_partitions) {
3000             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3001             OUT_BCS_BATCH(batch, offset);
3002         } else {
3003             OUT_BCS_BATCH(batch, 0);
3004             OUT_BCS_BATCH(batch, 0);
3005         }
3006
3007         offset += slice_param->partition_size[i];
3008     }
3009
3010     OUT_BCS_BATCH(batch, 0); /* concealment method */
3011
3012     ADVANCE_BCS_BATCH(batch);
3013 }
3014
3015 void
3016 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3017                             struct decode_state *decode_state,
3018                             struct gen7_mfd_context *gen7_mfd_context)
3019 {
3020     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3021     VAPictureParameterBufferVP8 *pic_param;
3022     VASliceParameterBufferVP8 *slice_param;
3023     dri_bo *slice_data_bo;
3024
3025     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3026     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3027
3028     /* one slice per frame */
3029     if (decode_state->num_slice_params != 1 ||
3030         (!decode_state->slice_params ||
3031          !decode_state->slice_params[0] ||
3032          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3033         (!decode_state->slice_datas ||
3034          !decode_state->slice_datas[0] ||
3035          !decode_state->slice_datas[0]->bo) ||
3036         !decode_state->probability_data) {
3037         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3038
3039         return;
3040     }
3041
3042     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3043     slice_data_bo = decode_state->slice_datas[0]->bo;
3044
3045     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3046     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3047     intel_batchbuffer_emit_mi_flush(batch);
3048     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3049     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3050     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3051     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3052     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3053     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3054     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3055     intel_batchbuffer_end_atomic(batch);
3056     intel_batchbuffer_flush(batch);
3057 }
3058
3059 static VAStatus
3060 gen8_mfd_decode_picture(VADriverContextP ctx, 
3061                         VAProfile profile, 
3062                         union codec_state *codec_state,
3063                         struct hw_context *hw_context)
3064
3065 {
3066     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3067     struct decode_state *decode_state = &codec_state->decode;
3068     VAStatus vaStatus;
3069
3070     assert(gen7_mfd_context);
3071
3072     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3073
3074     if (vaStatus != VA_STATUS_SUCCESS)
3075         goto out;
3076
3077     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3078
3079     switch (profile) {
3080     case VAProfileMPEG2Simple:
3081     case VAProfileMPEG2Main:
3082         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3083         break;
3084         
3085     case VAProfileH264ConstrainedBaseline:
3086     case VAProfileH264Main:
3087     case VAProfileH264High:
3088     case VAProfileH264StereoHigh:
3089     case VAProfileH264MultiviewHigh:
3090         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3091         break;
3092
3093     case VAProfileVC1Simple:
3094     case VAProfileVC1Main:
3095     case VAProfileVC1Advanced:
3096         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3097         break;
3098
3099     case VAProfileJPEGBaseline:
3100         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3101         break;
3102
3103     case VAProfileVP8Version0_3:
3104         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3105         break;
3106
3107     default:
3108         assert(0);
3109         break;
3110     }
3111
3112     vaStatus = VA_STATUS_SUCCESS;
3113
3114 out:
3115     return vaStatus;
3116 }
3117
3118 static void
3119 gen8_mfd_context_destroy(void *hw_context)
3120 {
3121     VADriverContextP ctx;
3122     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3123
3124     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3125
3126     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3127     gen7_mfd_context->post_deblocking_output.bo = NULL;
3128
3129     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3130     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3131
3132     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3133     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3134
3135     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3136     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3137
3138     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3139     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3140
3141     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3142     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3143
3144     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3145     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3146
3147     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3148     gen7_mfd_context->segmentation_buffer.bo = NULL;
3149
3150     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3151
3152     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3153         i965_DestroySurfaces(ctx,
3154                              &gen7_mfd_context->jpeg_wa_surface_id,
3155                              1);
3156         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3157     }
3158
3159     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3160     free(gen7_mfd_context);
3161 }
3162
3163 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3164                                     struct gen7_mfd_context *gen7_mfd_context)
3165 {
3166     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3167     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3168     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3169     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3170 }
3171
3172 struct hw_context *
3173 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3174 {
3175     struct intel_driver_data *intel = intel_driver_data(ctx);
3176     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3177     int i;
3178
3179     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3180     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3181     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3182
3183     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3184         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3185         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3186     }
3187
3188     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3189     gen7_mfd_context->segmentation_buffer.valid = 0;
3190
3191     switch (obj_config->profile) {
3192     case VAProfileMPEG2Simple:
3193     case VAProfileMPEG2Main:
3194         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3195         break;
3196
3197     case VAProfileH264ConstrainedBaseline:
3198     case VAProfileH264Main:
3199     case VAProfileH264High:
3200     case VAProfileH264StereoHigh:
3201     case VAProfileH264MultiviewHigh:
3202         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3203         break;
3204     default:
3205         break;
3206     }
3207
3208     gen7_mfd_context->driver_context = ctx;
3209     return (struct hw_context *)gen7_mfd_context;
3210 }