OSDN Git Service

3f31d5e8d21aae6a6b5131fae74fcae9e3023c0a
[android-x86/hardware-intel-common-vaapi.git] / src / gen75_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui  <yakui.zhao@intel.com>
27  *
28  */
29 #include "sysdeps.h"
30
31 #include <va/va_dec_jpeg.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35 #include "i965_defines.h"
36 #include "i965_drv_video.h"
37 #include "i965_decoder_utils.h"
38 #include "gen7_mfd.h"
39 #include "intel_media.h"
40
41 #define B0_STEP_REV     2
42 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
43
44 static const uint32_t zigzag_direct[64] = {
45     0,   1,  8, 16,  9,  2,  3, 10,
46     17, 24, 32, 25, 18, 11,  4,  5,
47     12, 19, 26, 33, 40, 48, 41, 34,
48     27, 20, 13,  6,  7, 14, 21, 28,
49     35, 42, 49, 56, 57, 50, 43, 36,
50     29, 22, 15, 23, 30, 37, 44, 51,
51     58, 59, 52, 45, 38, 31, 39, 46,
52     53, 60, 61, 54, 47, 55, 62, 63
53 };
54
55 static void
56 gen75_mfd_init_avc_surface(VADriverContextP ctx,
57                            VAPictureParameterBufferH264 *pic_param,
58                            struct object_surface *obj_surface)
59 {
60     struct i965_driver_data *i965 = i965_driver_data(ctx);
61     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
62     int width_in_mbs, height_in_mbs;
63
64     obj_surface->free_private_data = gen_free_avc_surface;
65     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
66     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
67
68     if (!gen7_avc_surface) {
69         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
70         assert(gen7_avc_surface);
71         gen7_avc_surface->base.frame_store_id = -1;
72         assert((obj_surface->size & 0x3f) == 0);
73         obj_surface->private_data = gen7_avc_surface;
74     }
75
76     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
77                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
78
79     if (gen7_avc_surface->dmv_top == NULL) {
80         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
81                                                  "direct mv w/r buffer",
82                                                  width_in_mbs * height_in_mbs * 128,
83                                                  0x1000);
84         assert(gen7_avc_surface->dmv_top);
85     }
86
87     if (gen7_avc_surface->dmv_bottom_flag &&
88         gen7_avc_surface->dmv_bottom == NULL) {
89         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
90                                                     "direct mv w/r buffer",
91                                                     width_in_mbs * height_in_mbs * 128,
92                                                     0x1000);
93         assert(gen7_avc_surface->dmv_bottom);
94     }
95 }
96
97 static void
98 gen75_mfd_pipe_mode_select(VADriverContextP ctx,
99                            struct decode_state *decode_state,
100                            int standard_select,
101                            struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG);
109
110     BEGIN_BCS_BATCH(batch, 5);
111     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
112     OUT_BCS_BATCH(batch,
113                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
114                   (MFD_MODE_VLD << 15) | /* VLD mode */
115                   (0 << 10) | /* disable Stream-Out */
116                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
117                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
118                   (0 << 5)  | /* not in stitch mode */
119                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
120                   (standard_select << 0));
121     OUT_BCS_BATCH(batch,
122                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
123                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
124                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
125                   (0 << 1)  |
126                   (0 << 0));
127     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
128     OUT_BCS_BATCH(batch, 0); /* reserved */
129     ADVANCE_BCS_BATCH(batch);
130 }
131
132 static void
133 gen75_mfd_surface_state(VADriverContextP ctx,
134                         struct decode_state *decode_state,
135                         int standard_select,
136                         struct gen7_mfd_context *gen7_mfd_context)
137 {
138     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
139     struct object_surface *obj_surface = decode_state->render_object;
140     unsigned int y_cb_offset;
141     unsigned int y_cr_offset;
142     unsigned int surface_format;
143
144     assert(obj_surface);
145
146     y_cb_offset = obj_surface->y_cb_offset;
147     y_cr_offset = obj_surface->y_cr_offset;
148
149     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
150                      MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
151
152     BEGIN_BCS_BATCH(batch, 6);
153     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
154     OUT_BCS_BATCH(batch, 0);
155     OUT_BCS_BATCH(batch,
156                   ((obj_surface->orig_height - 1) << 18) |
157                   ((obj_surface->orig_width - 1) << 4));
158     OUT_BCS_BATCH(batch,
159                   (surface_format << 28) | /* 420 planar YUV surface */
160                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
161                   (0 << 22) | /* surface object control state, ignored */
162                   ((obj_surface->width - 1) << 3) | /* pitch */
163                   (0 << 2)  | /* must be 0 */
164                   (1 << 1)  | /* must be tiled */
165                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
166     OUT_BCS_BATCH(batch,
167                   (0 << 16) | /* X offset for U(Cb), must be 0 */
168                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
169     OUT_BCS_BATCH(batch,
170                   (0 << 16) | /* X offset for V(Cr), must be 0 */
171                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
172     ADVANCE_BCS_BATCH(batch);
173 }
174
175 static void
176 gen75_mfd_pipe_buf_addr_state_bplus(VADriverContextP ctx,
177                                     struct decode_state *decode_state,
178                                     int standard_select,
179                                     struct gen7_mfd_context *gen7_mfd_context)
180 {
181     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
182     int i;
183
184     BEGIN_BCS_BATCH(batch, 61);
185     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
186     /* Pre-deblock 1-3 */
187     if (gen7_mfd_context->pre_deblocking_output.valid)
188         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
189                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
190                       0);
191     else
192         OUT_BCS_BATCH(batch, 0);
193
194     OUT_BCS_BATCH(batch, 0);
195     OUT_BCS_BATCH(batch, 0);
196     /* Post-debloing 4-6 */
197     if (gen7_mfd_context->post_deblocking_output.valid)
198         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
199                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
200                       0);
201     else
202         OUT_BCS_BATCH(batch, 0);
203
204     OUT_BCS_BATCH(batch, 0);
205     OUT_BCS_BATCH(batch, 0);
206
207     /* uncompressed-video & stream out 7-12 */
208     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
209     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
210     OUT_BCS_BATCH(batch, 0);
211     OUT_BCS_BATCH(batch, 0);
212     OUT_BCS_BATCH(batch, 0);
213     OUT_BCS_BATCH(batch, 0);
214
215     /* intra row-store scratch 13-15 */
216     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
217         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
218                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
219                       0);
220     else
221         OUT_BCS_BATCH(batch, 0);
222
223     OUT_BCS_BATCH(batch, 0);
224     OUT_BCS_BATCH(batch, 0);
225     /* deblocking-filter-row-store 16-18 */
226     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
227         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
228                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
229                       0);
230     else
231         OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234
235     /* DW 19..50 */
236     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
237         struct object_surface *obj_surface;
238
239         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
240             gen7_mfd_context->reference_surface[i].obj_surface &&
241             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
242             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
243
244             OUT_BCS_RELOC(batch, obj_surface->bo,
245                           I915_GEM_DOMAIN_INSTRUCTION, 0,
246                           0);
247         } else {
248             OUT_BCS_BATCH(batch, 0);
249         }
250         OUT_BCS_BATCH(batch, 0);
251     }
252     /* reference property 51 */
253     OUT_BCS_BATCH(batch, 0);
254
255     /* Macroblock status & ILDB 52-57 */
256     OUT_BCS_BATCH(batch, 0);
257     OUT_BCS_BATCH(batch, 0);
258     OUT_BCS_BATCH(batch, 0);
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262
263     /* the second Macroblock status 58-60 */
264     OUT_BCS_BATCH(batch, 0);
265     OUT_BCS_BATCH(batch, 0);
266     OUT_BCS_BATCH(batch, 0);
267     ADVANCE_BCS_BATCH(batch);
268 }
269
270 static void
271 gen75_mfd_pipe_buf_addr_state(VADriverContextP ctx,
272                               struct decode_state *decode_state,
273                               int standard_select,
274                               struct gen7_mfd_context *gen7_mfd_context)
275 {
276     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
277     struct i965_driver_data *i965 = i965_driver_data(ctx);
278     int i;
279
280     if (IS_STEPPING_BPLUS(i965)) {
281         gen75_mfd_pipe_buf_addr_state_bplus(ctx, decode_state,
282                                             standard_select, gen7_mfd_context);
283         return;
284     }
285
286     BEGIN_BCS_BATCH(batch, 25);
287     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
288     if (gen7_mfd_context->pre_deblocking_output.valid)
289         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
290                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
291                       0);
292     else
293         OUT_BCS_BATCH(batch, 0);
294
295     if (gen7_mfd_context->post_deblocking_output.valid)
296         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
297                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
298                       0);
299     else
300         OUT_BCS_BATCH(batch, 0);
301
302     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
303     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
304
305     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
306         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
307                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
308                       0);
309     else
310         OUT_BCS_BATCH(batch, 0);
311
312     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
313         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
314                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
315                       0);
316     else
317         OUT_BCS_BATCH(batch, 0);
318
319     /* DW 7..22 */
320     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
321         struct object_surface *obj_surface;
322
323         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
324             gen7_mfd_context->reference_surface[i].obj_surface &&
325             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
326             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
327
328             OUT_BCS_RELOC(batch, obj_surface->bo,
329                           I915_GEM_DOMAIN_INSTRUCTION, 0,
330                           0);
331         } else {
332             OUT_BCS_BATCH(batch, 0);
333         }
334     }
335
336     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
337     OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
338     ADVANCE_BCS_BATCH(batch);
339 }
340
341 static void
342 gen75_mfd_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
343                                         dri_bo *slice_data_bo,
344                                         int standard_select,
345                                         struct gen7_mfd_context *gen7_mfd_context)
346 {
347     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
348
349     BEGIN_BCS_BATCH(batch, 26);
350     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
351     /* MFX In BS 1-5 */
352     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
353     OUT_BCS_BATCH(batch, 0);
354     OUT_BCS_BATCH(batch, 0);
355     /* Upper bound 4-5 */
356     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
357     OUT_BCS_BATCH(batch, 0);
358
359     /* MFX indirect MV 6-10 */
360     OUT_BCS_BATCH(batch, 0);
361     OUT_BCS_BATCH(batch, 0);
362     OUT_BCS_BATCH(batch, 0);
363     OUT_BCS_BATCH(batch, 0);
364     OUT_BCS_BATCH(batch, 0);
365
366     /* MFX IT_COFF 11-15 */
367     OUT_BCS_BATCH(batch, 0);
368     OUT_BCS_BATCH(batch, 0);
369     OUT_BCS_BATCH(batch, 0);
370     OUT_BCS_BATCH(batch, 0);
371     OUT_BCS_BATCH(batch, 0);
372
373     /* MFX IT_DBLK 16-20 */
374     OUT_BCS_BATCH(batch, 0);
375     OUT_BCS_BATCH(batch, 0);
376     OUT_BCS_BATCH(batch, 0);
377     OUT_BCS_BATCH(batch, 0);
378     OUT_BCS_BATCH(batch, 0);
379
380     /* MFX PAK_BSE object for encoder 21-25 */
381     OUT_BCS_BATCH(batch, 0);
382     OUT_BCS_BATCH(batch, 0);
383     OUT_BCS_BATCH(batch, 0);
384     OUT_BCS_BATCH(batch, 0);
385     OUT_BCS_BATCH(batch, 0);
386
387     ADVANCE_BCS_BATCH(batch);
388 }
389
390 static void
391 gen75_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
392                                   dri_bo *slice_data_bo,
393                                   int standard_select,
394                                   struct gen7_mfd_context *gen7_mfd_context)
395 {
396     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
397     struct i965_driver_data *i965 = i965_driver_data(ctx);
398
399     if (IS_STEPPING_BPLUS(i965)) {
400         gen75_mfd_ind_obj_base_addr_state_bplus(ctx, slice_data_bo,
401                                                 standard_select, gen7_mfd_context);
402         return;
403     }
404
405     BEGIN_BCS_BATCH(batch, 11);
406     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
407     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
408     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
409     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
410     OUT_BCS_BATCH(batch, 0);
411     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
412     OUT_BCS_BATCH(batch, 0);
413     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
414     OUT_BCS_BATCH(batch, 0);
415     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
416     OUT_BCS_BATCH(batch, 0);
417     ADVANCE_BCS_BATCH(batch);
418 }
419
420 static void
421 gen75_mfd_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
422                                         struct decode_state *decode_state,
423                                         int standard_select,
424                                         struct gen7_mfd_context *gen7_mfd_context)
425 {
426     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
427
428     BEGIN_BCS_BATCH(batch, 10);
429     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
430
431     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
432         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
433                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
434                       0);
435     else
436         OUT_BCS_BATCH(batch, 0);
437
438     OUT_BCS_BATCH(batch, 0);
439     OUT_BCS_BATCH(batch, 0);
440     /* MPR Row Store Scratch buffer 4-6 */
441     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
442         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
443                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
444                       0);
445     else
446         OUT_BCS_BATCH(batch, 0);
447     OUT_BCS_BATCH(batch, 0);
448     OUT_BCS_BATCH(batch, 0);
449
450     /* Bitplane 7-9 */
451     if (gen7_mfd_context->bitplane_read_buffer.valid)
452         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
453                       I915_GEM_DOMAIN_INSTRUCTION, 0,
454                       0);
455     else
456         OUT_BCS_BATCH(batch, 0);
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459
460     ADVANCE_BCS_BATCH(batch);
461 }
462
463 static void
464 gen75_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
465                                   struct decode_state *decode_state,
466                                   int standard_select,
467                                   struct gen7_mfd_context *gen7_mfd_context)
468 {
469     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
470     struct i965_driver_data *i965 = i965_driver_data(ctx);
471
472     if (IS_STEPPING_BPLUS(i965)) {
473         gen75_mfd_bsp_buf_base_addr_state_bplus(ctx, decode_state,
474                                                 standard_select, gen7_mfd_context);
475         return;
476     }
477
478     BEGIN_BCS_BATCH(batch, 4);
479     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
480
481     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
482         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
483                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
484                       0);
485     else
486         OUT_BCS_BATCH(batch, 0);
487
488     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
489         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
490                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
491                       0);
492     else
493         OUT_BCS_BATCH(batch, 0);
494
495     if (gen7_mfd_context->bitplane_read_buffer.valid)
496         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
497                       I915_GEM_DOMAIN_INSTRUCTION, 0,
498                       0);
499     else
500         OUT_BCS_BATCH(batch, 0);
501
502     ADVANCE_BCS_BATCH(batch);
503 }
504
505 static void
506 gen75_mfd_qm_state(VADriverContextP ctx,
507                    int qm_type,
508                    unsigned char *qm,
509                    int qm_length,
510                    struct gen7_mfd_context *gen7_mfd_context)
511 {
512     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
513     unsigned int qm_buffer[16];
514
515     assert(qm_length <= 16 * 4);
516     memcpy(qm_buffer, qm, qm_length);
517
518     BEGIN_BCS_BATCH(batch, 18);
519     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
520     OUT_BCS_BATCH(batch, qm_type << 0);
521     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
522     ADVANCE_BCS_BATCH(batch);
523 }
524
525 static void
526 gen75_mfd_avc_img_state(VADriverContextP ctx,
527                         struct decode_state *decode_state,
528                         struct gen7_mfd_context *gen7_mfd_context)
529 {
530     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
531     int img_struct;
532     int mbaff_frame_flag;
533     unsigned int width_in_mbs, height_in_mbs;
534     VAPictureParameterBufferH264 *pic_param;
535
536     assert(decode_state->pic_param && decode_state->pic_param->buffer);
537     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
538
539     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
540
541     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
542         img_struct = 1;
543     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
544         img_struct = 3;
545     else
546         img_struct = 0;
547
548     if ((img_struct & 0x1) == 0x1) {
549         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
550     } else {
551         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
552     }
553
554     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
555         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
556         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
557     } else {
558         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
559     }
560
561     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
562                         !pic_param->pic_fields.bits.field_pic_flag);
563
564     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
565     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
566
567     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
568     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
569            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
570     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
571
572     BEGIN_BCS_BATCH(batch, 17);
573     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
574     OUT_BCS_BATCH(batch,
575                   (width_in_mbs * height_in_mbs - 1));
576     OUT_BCS_BATCH(batch,
577                   ((height_in_mbs - 1) << 16) |
578                   ((width_in_mbs - 1) << 0));
579     OUT_BCS_BATCH(batch,
580                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
581                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
582                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
583                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
584                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
585                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
586                   (img_struct << 8));
587     OUT_BCS_BATCH(batch,
588                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
589                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
590                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
591                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
592                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
593                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
594                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
595                   (mbaff_frame_flag << 1) |
596                   (pic_param->pic_fields.bits.field_pic_flag << 0));
597     OUT_BCS_BATCH(batch, 0);
598     OUT_BCS_BATCH(batch, 0);
599     OUT_BCS_BATCH(batch, 0);
600     OUT_BCS_BATCH(batch, 0);
601     OUT_BCS_BATCH(batch, 0);
602     OUT_BCS_BATCH(batch, 0);
603     OUT_BCS_BATCH(batch, 0);
604     OUT_BCS_BATCH(batch, 0);
605     OUT_BCS_BATCH(batch, 0);
606     OUT_BCS_BATCH(batch, 0);
607     OUT_BCS_BATCH(batch, 0);
608     OUT_BCS_BATCH(batch, 0);
609     ADVANCE_BCS_BATCH(batch);
610 }
611
612 static void
613 gen75_mfd_avc_qm_state(VADriverContextP ctx,
614                        struct decode_state *decode_state,
615                        struct gen7_mfd_context *gen7_mfd_context)
616 {
617     VAIQMatrixBufferH264 *iq_matrix;
618     VAPictureParameterBufferH264 *pic_param;
619
620     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
621         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
622     else
623         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
624
625     assert(decode_state->pic_param && decode_state->pic_param->buffer);
626     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
627
628     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
629     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
630
631     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
632         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
633         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
634     }
635 }
636
637 static inline void
638 gen75_mfd_avc_picid_state(VADriverContextP ctx,
639                           struct decode_state *decode_state,
640                           struct gen7_mfd_context *gen7_mfd_context)
641 {
642     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
643                                gen7_mfd_context->reference_surface);
644 }
645
646 static void
647 gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
648                                      struct decode_state *decode_state,
649                                      VAPictureParameterBufferH264 *pic_param,
650                                      VASliceParameterBufferH264 *slice_param,
651                                      struct gen7_mfd_context *gen7_mfd_context)
652 {
653     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
654     struct object_surface *obj_surface;
655     GenAvcSurface *gen7_avc_surface;
656     VAPictureH264 *va_pic;
657     int i;
658
659     BEGIN_BCS_BATCH(batch, 71);
660     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
661
662     /* reference surfaces 0..15 */
663     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
664         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
665             gen7_mfd_context->reference_surface[i].obj_surface &&
666             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
667
668             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
669             gen7_avc_surface = obj_surface->private_data;
670             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
671                           I915_GEM_DOMAIN_INSTRUCTION, 0,
672                           0);
673             OUT_BCS_BATCH(batch, 0);
674         } else {
675             OUT_BCS_BATCH(batch, 0);
676             OUT_BCS_BATCH(batch, 0);
677         }
678     }
679
680     OUT_BCS_BATCH(batch, 0);
681
682     /* the current decoding frame/field */
683     va_pic = &pic_param->CurrPic;
684     obj_surface = decode_state->render_object;
685     assert(obj_surface->bo && obj_surface->private_data);
686     gen7_avc_surface = obj_surface->private_data;
687
688     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
689                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
690                   0);
691
692     OUT_BCS_BATCH(batch, 0);
693     OUT_BCS_BATCH(batch, 0);
694
695     /* POC List */
696     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
697         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
698
699         if (obj_surface) {
700             const VAPictureH264 * const va_pic = avc_find_picture(
701                                                      obj_surface->base.id, pic_param->ReferenceFrames,
702                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
703
704             assert(va_pic != NULL);
705             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
706             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
707         } else {
708             OUT_BCS_BATCH(batch, 0);
709             OUT_BCS_BATCH(batch, 0);
710         }
711     }
712
713     va_pic = &pic_param->CurrPic;
714     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
715     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
716
717     ADVANCE_BCS_BATCH(batch);
718 }
719
720 static void
721 gen75_mfd_avc_directmode_state(VADriverContextP ctx,
722                                struct decode_state *decode_state,
723                                VAPictureParameterBufferH264 *pic_param,
724                                VASliceParameterBufferH264 *slice_param,
725                                struct gen7_mfd_context *gen7_mfd_context)
726 {
727     struct i965_driver_data *i965 = i965_driver_data(ctx);
728     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
729     struct object_surface *obj_surface;
730     GenAvcSurface *gen7_avc_surface;
731     VAPictureH264 *va_pic;
732     int i;
733
734     if (IS_STEPPING_BPLUS(i965)) {
735         gen75_mfd_avc_directmode_state_bplus(ctx, decode_state, pic_param, slice_param,
736                                              gen7_mfd_context);
737
738         return;
739     }
740
741     BEGIN_BCS_BATCH(batch, 69);
742     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
743
744     /* reference surfaces 0..15 */
745     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
746         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
747             gen7_mfd_context->reference_surface[i].obj_surface &&
748             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
749
750             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
751             gen7_avc_surface = obj_surface->private_data;
752
753             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
754                           I915_GEM_DOMAIN_INSTRUCTION, 0,
755                           0);
756
757             if (gen7_avc_surface->dmv_bottom_flag == 1)
758                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
759                               I915_GEM_DOMAIN_INSTRUCTION, 0,
760                               0);
761             else
762                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
763                               I915_GEM_DOMAIN_INSTRUCTION, 0,
764                               0);
765         } else {
766             OUT_BCS_BATCH(batch, 0);
767             OUT_BCS_BATCH(batch, 0);
768         }
769     }
770
771     /* the current decoding frame/field */
772     va_pic = &pic_param->CurrPic;
773     obj_surface = decode_state->render_object;
774     assert(obj_surface->bo && obj_surface->private_data);
775     gen7_avc_surface = obj_surface->private_data;
776
777     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
778                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
779                   0);
780
781     if (gen7_avc_surface->dmv_bottom_flag == 1)
782         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
783                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
784                       0);
785     else
786         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
787                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
788                       0);
789
790     /* POC List */
791     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
792         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
793
794         if (obj_surface) {
795             const VAPictureH264 * const va_pic = avc_find_picture(
796                                                      obj_surface->base.id, pic_param->ReferenceFrames,
797                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
798
799             assert(va_pic != NULL);
800             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
801             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
802         } else {
803             OUT_BCS_BATCH(batch, 0);
804             OUT_BCS_BATCH(batch, 0);
805         }
806     }
807
808     va_pic = &pic_param->CurrPic;
809     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
810     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
811
812     ADVANCE_BCS_BATCH(batch);
813 }
814
815 static void
816 gen75_mfd_avc_phantom_slice_first(VADriverContextP ctx,
817                                   VAPictureParameterBufferH264 *pic_param,
818                                   VASliceParameterBufferH264 *next_slice_param,
819                                   struct gen7_mfd_context *gen7_mfd_context)
820 {
821     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
822 }
823
824 static void
825 gen75_mfd_avc_slice_state(VADriverContextP ctx,
826                           VAPictureParameterBufferH264 *pic_param,
827                           VASliceParameterBufferH264 *slice_param,
828                           VASliceParameterBufferH264 *next_slice_param,
829                           struct gen7_mfd_context *gen7_mfd_context)
830 {
831     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
832     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
833     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
834     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
835     int num_ref_idx_l0, num_ref_idx_l1;
836     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
837                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
838     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
839     int slice_type;
840
841     if (slice_param->slice_type == SLICE_TYPE_I ||
842         slice_param->slice_type == SLICE_TYPE_SI) {
843         slice_type = SLICE_TYPE_I;
844     } else if (slice_param->slice_type == SLICE_TYPE_P ||
845                slice_param->slice_type == SLICE_TYPE_SP) {
846         slice_type = SLICE_TYPE_P;
847     } else {
848         assert(slice_param->slice_type == SLICE_TYPE_B);
849         slice_type = SLICE_TYPE_B;
850     }
851
852     if (slice_type == SLICE_TYPE_I) {
853         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
854         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
855         num_ref_idx_l0 = 0;
856         num_ref_idx_l1 = 0;
857     } else if (slice_type == SLICE_TYPE_P) {
858         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
859         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
860         num_ref_idx_l1 = 0;
861     } else {
862         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
863         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
864     }
865
866     first_mb_in_slice = slice_param->first_mb_in_slice;
867     slice_hor_pos = first_mb_in_slice % width_in_mbs;
868     slice_ver_pos = first_mb_in_slice / width_in_mbs;
869
870     if (mbaff_picture)
871         slice_ver_pos = slice_ver_pos << 1;
872
873     if (next_slice_param) {
874         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
875         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
876         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
877
878         if (mbaff_picture)
879             next_slice_ver_pos = next_slice_ver_pos << 1;
880     } else {
881         next_slice_hor_pos = 0;
882         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
883     }
884
885     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
886     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
887     OUT_BCS_BATCH(batch, slice_type);
888     OUT_BCS_BATCH(batch,
889                   (num_ref_idx_l1 << 24) |
890                   (num_ref_idx_l0 << 16) |
891                   (slice_param->chroma_log2_weight_denom << 8) |
892                   (slice_param->luma_log2_weight_denom << 0));
893     OUT_BCS_BATCH(batch,
894                   (slice_param->direct_spatial_mv_pred_flag << 29) |
895                   (slice_param->disable_deblocking_filter_idc << 27) |
896                   (slice_param->cabac_init_idc << 24) |
897                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
898                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
899                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
900     OUT_BCS_BATCH(batch,
901                   (slice_ver_pos << 24) |
902                   (slice_hor_pos << 16) |
903                   (first_mb_in_slice << 0));
904     OUT_BCS_BATCH(batch,
905                   (next_slice_ver_pos << 16) |
906                   (next_slice_hor_pos << 0));
907     OUT_BCS_BATCH(batch,
908                   (next_slice_param == NULL) << 19); /* last slice flag */
909     OUT_BCS_BATCH(batch, 0);
910     OUT_BCS_BATCH(batch, 0);
911     OUT_BCS_BATCH(batch, 0);
912     OUT_BCS_BATCH(batch, 0);
913     ADVANCE_BCS_BATCH(batch);
914 }
915
916 static inline void
917 gen75_mfd_avc_ref_idx_state(VADriverContextP ctx,
918                             VAPictureParameterBufferH264 *pic_param,
919                             VASliceParameterBufferH264 *slice_param,
920                             struct gen7_mfd_context *gen7_mfd_context)
921 {
922     gen6_send_avc_ref_idx_state(
923         gen7_mfd_context->base.batch,
924         slice_param,
925         gen7_mfd_context->reference_surface
926     );
927 }
928
929 static void
930 gen75_mfd_avc_weightoffset_state(VADriverContextP ctx,
931                                  VAPictureParameterBufferH264 *pic_param,
932                                  VASliceParameterBufferH264 *slice_param,
933                                  struct gen7_mfd_context *gen7_mfd_context)
934 {
935     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
936     int i, j, num_weight_offset_table = 0;
937     short weightoffsets[32 * 6];
938
939     if ((slice_param->slice_type == SLICE_TYPE_P ||
940          slice_param->slice_type == SLICE_TYPE_SP) &&
941         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
942         num_weight_offset_table = 1;
943     }
944
945     if ((slice_param->slice_type == SLICE_TYPE_B) &&
946         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
947         num_weight_offset_table = 2;
948     }
949
950     for (i = 0; i < num_weight_offset_table; i++) {
951         BEGIN_BCS_BATCH(batch, 98);
952         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
953         OUT_BCS_BATCH(batch, i);
954
955         if (i == 0) {
956             for (j = 0; j < 32; j++) {
957                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
958                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
959                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
960                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
961                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
962                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
963             }
964         } else {
965             for (j = 0; j < 32; j++) {
966                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
967                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
968                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
969                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
970                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
971                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
972             }
973         }
974
975         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
976         ADVANCE_BCS_BATCH(batch);
977     }
978 }
979
980 static void
981 gen75_mfd_avc_bsd_object(VADriverContextP ctx,
982                          VAPictureParameterBufferH264 *pic_param,
983                          VASliceParameterBufferH264 *slice_param,
984                          dri_bo *slice_data_bo,
985                          VASliceParameterBufferH264 *next_slice_param,
986                          struct gen7_mfd_context *gen7_mfd_context)
987 {
988     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
989     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
990                                                             slice_param,
991                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
992
993     /* the input bitsteam format on GEN7 differs from GEN6 */
994     BEGIN_BCS_BATCH(batch, 6);
995     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
996     OUT_BCS_BATCH(batch,
997                   (slice_param->slice_data_size - slice_param->slice_data_offset));
998     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
999     OUT_BCS_BATCH(batch,
1000                   (0 << 31) |
1001                   (0 << 14) |
1002                   (0 << 12) |
1003                   (0 << 10) |
1004                   (0 << 8));
1005     OUT_BCS_BATCH(batch,
1006                   ((slice_data_bit_offset >> 3) << 16) |
1007                   (1 << 7)  |
1008                   (0 << 5)  |
1009                   (0 << 4)  |
1010                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
1011                   (slice_data_bit_offset & 0x7));
1012     OUT_BCS_BATCH(batch, 0);
1013     ADVANCE_BCS_BATCH(batch);
1014 }
1015
1016 static inline void
1017 gen75_mfd_avc_context_init(
1018     VADriverContextP         ctx,
1019     struct gen7_mfd_context *gen7_mfd_context
1020 )
1021 {
1022     /* Initialize flat scaling lists */
1023     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
1024 }
1025
1026 static void
1027 gen75_mfd_avc_decode_init(VADriverContextP ctx,
1028                           struct decode_state *decode_state,
1029                           struct gen7_mfd_context *gen7_mfd_context)
1030 {
1031     VAPictureParameterBufferH264 *pic_param;
1032     VASliceParameterBufferH264 *slice_param;
1033     struct i965_driver_data *i965 = i965_driver_data(ctx);
1034     struct object_surface *obj_surface;
1035     dri_bo *bo;
1036     int i, j, enable_avc_ildb = 0;
1037     unsigned int width_in_mbs, height_in_mbs;
1038
1039     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
1040         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1041         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1042
1043         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1044             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1045             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1046                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1047                    (slice_param->slice_type == SLICE_TYPE_P) ||
1048                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1049                    (slice_param->slice_type == SLICE_TYPE_B));
1050
1051             if (slice_param->disable_deblocking_filter_idc != 1) {
1052                 enable_avc_ildb = 1;
1053                 break;
1054             }
1055
1056             slice_param++;
1057         }
1058     }
1059
1060     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1061     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1062     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
1063                                        gen7_mfd_context->reference_surface);
1064     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
1065     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
1066     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
1067     assert(height_in_mbs > 0 && height_in_mbs <= 256);
1068
1069     /* Current decoded picture */
1070     obj_surface = decode_state->render_object;
1071     if (pic_param->pic_fields.bits.reference_pic_flag)
1072         obj_surface->flags |= SURFACE_REFERENCED;
1073     else
1074         obj_surface->flags &= ~SURFACE_REFERENCED;
1075
1076     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
1077     gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
1078
1079     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1080     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1081     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1082     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
1083
1084     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1085     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1086     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1087     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1088
1089     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1090     bo = dri_bo_alloc(i965->intel.bufmgr,
1091                       "intra row store",
1092                       width_in_mbs * 64,
1093                       0x1000);
1094     assert(bo);
1095     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1096     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1097
1098     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1099     bo = dri_bo_alloc(i965->intel.bufmgr,
1100                       "deblocking filter row store",
1101                       width_in_mbs * 64 * 4,
1102                       0x1000);
1103     assert(bo);
1104     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1105     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1106
1107     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1108     bo = dri_bo_alloc(i965->intel.bufmgr,
1109                       "bsd mpc row store",
1110                       width_in_mbs * 64 * 2,
1111                       0x1000);
1112     assert(bo);
1113     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1114     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1115
1116     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
1117     bo = dri_bo_alloc(i965->intel.bufmgr,
1118                       "mpr row store",
1119                       width_in_mbs * 64 * 2,
1120                       0x1000);
1121     assert(bo);
1122     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1123     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1124
1125     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1126 }
1127
1128 static void
1129 gen75_mfd_avc_decode_picture(VADriverContextP ctx,
1130                              struct decode_state *decode_state,
1131                              struct gen7_mfd_context *gen7_mfd_context)
1132 {
1133     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1134     VAPictureParameterBufferH264 *pic_param;
1135     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1136     dri_bo *slice_data_bo;
1137     int i, j;
1138
1139     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1140     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1141     gen75_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
1142
1143     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1144     intel_batchbuffer_emit_mi_flush(batch);
1145     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1146     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1147     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1148     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1149     gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
1150     gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
1151     gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
1152
1153     for (j = 0; j < decode_state->num_slice_params; j++) {
1154         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1155         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1156         slice_data_bo = decode_state->slice_datas[j]->bo;
1157         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
1158
1159         if (j == decode_state->num_slice_params - 1)
1160             next_slice_group_param = NULL;
1161         else
1162             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1163
1164         if (j == 0 && slice_param->first_mb_in_slice)
1165             gen75_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
1166
1167         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1168             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1169             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1170                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1171                    (slice_param->slice_type == SLICE_TYPE_P) ||
1172                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1173                    (slice_param->slice_type == SLICE_TYPE_B));
1174
1175             if (i < decode_state->slice_params[j]->num_elements - 1)
1176                 next_slice_param = slice_param + 1;
1177             else
1178                 next_slice_param = next_slice_group_param;
1179
1180             gen75_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
1181             gen75_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
1182             gen75_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
1183             gen75_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1184             gen75_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1185             slice_param++;
1186         }
1187     }
1188
1189     intel_batchbuffer_end_atomic(batch);
1190     intel_batchbuffer_flush(batch);
1191 }
1192
1193 static void
1194 gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
1195                             struct decode_state *decode_state,
1196                             struct gen7_mfd_context *gen7_mfd_context)
1197 {
1198     VAPictureParameterBufferMPEG2 *pic_param;
1199     struct i965_driver_data *i965 = i965_driver_data(ctx);
1200     struct object_surface *obj_surface;
1201     dri_bo *bo;
1202     unsigned int width_in_mbs;
1203
1204     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1205     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1206     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1207
1208     mpeg2_set_reference_surfaces(
1209         ctx,
1210         gen7_mfd_context->reference_surface,
1211         decode_state,
1212         pic_param
1213     );
1214
1215     /* Current decoded picture */
1216     obj_surface = decode_state->render_object;
1217     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1218
1219     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1220     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1221     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1222     gen7_mfd_context->pre_deblocking_output.valid = 1;
1223
1224     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1225     bo = dri_bo_alloc(i965->intel.bufmgr,
1226                       "bsd mpc row store",
1227                       width_in_mbs * 96,
1228                       0x1000);
1229     assert(bo);
1230     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1231     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1232
1233     gen7_mfd_context->post_deblocking_output.valid = 0;
1234     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1235     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1236     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1237     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1238 }
1239
1240 static void
1241 gen75_mfd_mpeg2_pic_state(VADriverContextP ctx,
1242                           struct decode_state *decode_state,
1243                           struct gen7_mfd_context *gen7_mfd_context)
1244 {
1245     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1246     VAPictureParameterBufferMPEG2 *pic_param;
1247     unsigned int slice_concealment_disable_bit = 0;
1248
1249     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1250     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1251
1252     slice_concealment_disable_bit = 1;
1253
1254     BEGIN_BCS_BATCH(batch, 13);
1255     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1256     OUT_BCS_BATCH(batch,
1257                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1258                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1259                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1260                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1261                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1262                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1263                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1264                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1265                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1266                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1267                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1268                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1269     OUT_BCS_BATCH(batch,
1270                   pic_param->picture_coding_type << 9);
1271     OUT_BCS_BATCH(batch,
1272                   (slice_concealment_disable_bit << 31) |
1273                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1274                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1275     OUT_BCS_BATCH(batch, 0);
1276     OUT_BCS_BATCH(batch, 0);
1277     OUT_BCS_BATCH(batch, 0);
1278     OUT_BCS_BATCH(batch, 0);
1279     OUT_BCS_BATCH(batch, 0);
1280     OUT_BCS_BATCH(batch, 0);
1281     OUT_BCS_BATCH(batch, 0);
1282     OUT_BCS_BATCH(batch, 0);
1283     OUT_BCS_BATCH(batch, 0);
1284     ADVANCE_BCS_BATCH(batch);
1285 }
1286
1287 static void
1288 gen75_mfd_mpeg2_qm_state(VADriverContextP ctx,
1289                          struct decode_state *decode_state,
1290                          struct gen7_mfd_context *gen7_mfd_context)
1291 {
1292     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1293     int i, j;
1294
1295     /* Update internal QM state */
1296     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1297         VAIQMatrixBufferMPEG2 * const iq_matrix =
1298             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1299
1300         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1301             iq_matrix->load_intra_quantiser_matrix) {
1302             gen_iq_matrix->load_intra_quantiser_matrix =
1303                 iq_matrix->load_intra_quantiser_matrix;
1304             if (iq_matrix->load_intra_quantiser_matrix) {
1305                 for (j = 0; j < 64; j++)
1306                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1307                         iq_matrix->intra_quantiser_matrix[j];
1308             }
1309         }
1310
1311         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1312             iq_matrix->load_non_intra_quantiser_matrix) {
1313             gen_iq_matrix->load_non_intra_quantiser_matrix =
1314                 iq_matrix->load_non_intra_quantiser_matrix;
1315             if (iq_matrix->load_non_intra_quantiser_matrix) {
1316                 for (j = 0; j < 64; j++)
1317                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1318                         iq_matrix->non_intra_quantiser_matrix[j];
1319             }
1320         }
1321     }
1322
1323     /* Commit QM state to HW */
1324     for (i = 0; i < 2; i++) {
1325         unsigned char *qm = NULL;
1326         int qm_type;
1327
1328         if (i == 0) {
1329             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1330                 qm = gen_iq_matrix->intra_quantiser_matrix;
1331                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1332             }
1333         } else {
1334             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1335                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1336                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1337             }
1338         }
1339
1340         if (!qm)
1341             continue;
1342
1343         gen75_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1344     }
1345 }
1346
1347 static void
1348 gen75_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1349                            VAPictureParameterBufferMPEG2 *pic_param,
1350                            VASliceParameterBufferMPEG2 *slice_param,
1351                            VASliceParameterBufferMPEG2 *next_slice_param,
1352                            struct gen7_mfd_context *gen7_mfd_context)
1353 {
1354     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1355     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1356     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1357
1358     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1359         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1360         is_field_pic = 1;
1361     is_field_pic_wa = is_field_pic &&
1362                       gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1363
1364     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1365     hpos0 = slice_param->slice_horizontal_position;
1366
1367     if (next_slice_param == NULL) {
1368         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1369         hpos1 = 0;
1370     } else {
1371         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1372         hpos1 = next_slice_param->slice_horizontal_position;
1373     }
1374
1375     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1376
1377     BEGIN_BCS_BATCH(batch, 5);
1378     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1379     OUT_BCS_BATCH(batch,
1380                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1381     OUT_BCS_BATCH(batch,
1382                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1383     OUT_BCS_BATCH(batch,
1384                   hpos0 << 24 |
1385                   vpos0 << 16 |
1386                   mb_count << 8 |
1387                   (next_slice_param == NULL) << 5 |
1388                   (next_slice_param == NULL) << 3 |
1389                   (slice_param->macroblock_offset & 0x7));
1390     OUT_BCS_BATCH(batch,
1391                   (slice_param->quantiser_scale_code << 24) |
1392                   (vpos1 << 8 | hpos1));
1393     ADVANCE_BCS_BATCH(batch);
1394 }
1395
1396 static void
1397 gen75_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1398                                struct decode_state *decode_state,
1399                                struct gen7_mfd_context *gen7_mfd_context)
1400 {
1401     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1402     VAPictureParameterBufferMPEG2 *pic_param;
1403     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1404     dri_bo *slice_data_bo;
1405     int i, j;
1406
1407     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1408     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1409
1410     gen75_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1411     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1412     intel_batchbuffer_emit_mi_flush(batch);
1413     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1414     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1415     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1416     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1417     gen75_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1418     gen75_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1419
1420     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1421         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1422             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1423
1424     for (j = 0; j < decode_state->num_slice_params; j++) {
1425         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1426         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1427         slice_data_bo = decode_state->slice_datas[j]->bo;
1428         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1429
1430         if (j == decode_state->num_slice_params - 1)
1431             next_slice_group_param = NULL;
1432         else
1433             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1434
1435         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1436             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1437
1438             if (i < decode_state->slice_params[j]->num_elements - 1)
1439                 next_slice_param = slice_param + 1;
1440             else
1441                 next_slice_param = next_slice_group_param;
1442
1443             gen75_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1444             slice_param++;
1445         }
1446     }
1447
1448     intel_batchbuffer_end_atomic(batch);
1449     intel_batchbuffer_flush(batch);
1450 }
1451
1452 static const int va_to_gen7_vc1_pic_type[5] = {
1453     GEN7_VC1_I_PICTURE,
1454     GEN7_VC1_P_PICTURE,
1455     GEN7_VC1_B_PICTURE,
1456     GEN7_VC1_BI_PICTURE,
1457     GEN7_VC1_P_PICTURE,
1458 };
1459
1460 static const int va_to_gen7_vc1_mv[4] = {
1461     1, /* 1-MV */
1462     2, /* 1-MV half-pel */
1463     3, /* 1-MV half-pef bilinear */
1464     0, /* Mixed MV */
1465 };
1466
1467 static const int b_picture_scale_factor[21] = {
1468     128, 85,  170, 64,  192,
1469     51,  102, 153, 204, 43,
1470     215, 37,  74,  111, 148,
1471     185, 222, 32,  96,  160,
1472     224,
1473 };
1474
1475 static const int va_to_gen7_vc1_condover[3] = {
1476     0,
1477     2,
1478     3
1479 };
1480
1481 static const int va_to_gen7_vc1_profile[4] = {
1482     GEN7_VC1_SIMPLE_PROFILE,
1483     GEN7_VC1_MAIN_PROFILE,
1484     GEN7_VC1_RESERVED_PROFILE,
1485     GEN7_VC1_ADVANCED_PROFILE
1486 };
1487
1488 static void
1489 gen75_mfd_free_vc1_surface(void **data)
1490 {
1491     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1492
1493     if (!gen7_vc1_surface)
1494         return;
1495
1496     dri_bo_unreference(gen7_vc1_surface->dmv);
1497     free(gen7_vc1_surface);
1498     *data = NULL;
1499 }
1500
1501 static void
1502 gen75_mfd_init_vc1_surface(VADriverContextP ctx,
1503                            VAPictureParameterBufferVC1 *pic_param,
1504                            struct object_surface *obj_surface)
1505 {
1506     struct i965_driver_data *i965 = i965_driver_data(ctx);
1507     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1508     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1509     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1510
1511     obj_surface->free_private_data = gen75_mfd_free_vc1_surface;
1512
1513     if (!gen7_vc1_surface) {
1514         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1515         assert(gen7_vc1_surface);
1516         assert((obj_surface->size & 0x3f) == 0);
1517         obj_surface->private_data = gen7_vc1_surface;
1518     }
1519
1520     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1521     gen7_vc1_surface->intensity_compensation = 0;
1522     gen7_vc1_surface->luma_scale = 0;
1523     gen7_vc1_surface->luma_shift = 0;
1524
1525     if (gen7_vc1_surface->dmv == NULL) {
1526         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1527                                              "direct mv w/r buffer",
1528                                              width_in_mbs * height_in_mbs * 64,
1529                                              0x1000);
1530     }
1531 }
1532
1533 static void
1534 gen75_mfd_vc1_decode_init(VADriverContextP ctx,
1535                           struct decode_state *decode_state,
1536                           struct gen7_mfd_context *gen7_mfd_context)
1537 {
1538     VAPictureParameterBufferVC1 *pic_param;
1539     struct i965_driver_data *i965 = i965_driver_data(ctx);
1540     struct object_surface *obj_surface;
1541     dri_bo *bo;
1542     int width_in_mbs;
1543     int picture_type;
1544     int intensity_compensation;
1545
1546     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1547     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1548     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1549     picture_type = pic_param->picture_fields.bits.picture_type;
1550     intensity_compensation = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1551
1552     intel_update_vc1_frame_store_index(ctx,
1553                                        decode_state,
1554                                        pic_param,
1555                                        gen7_mfd_context->reference_surface);
1556
1557     /* Forward reference picture */
1558     obj_surface = decode_state->reference_objects[0];
1559     if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1560         obj_surface &&
1561         obj_surface->private_data) {
1562         if (picture_type == 1 && intensity_compensation) { /* P picture */
1563             struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1564
1565             gen7_vc1_surface->intensity_compensation = intensity_compensation;
1566             gen7_vc1_surface->luma_scale = pic_param->luma_scale;
1567             gen7_vc1_surface->luma_shift = pic_param->luma_shift;
1568         }
1569     }
1570
1571     /* Current decoded picture */
1572     obj_surface = decode_state->render_object;
1573     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1574     gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1575
1576     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1577     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1578     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1579     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1580
1581     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1582     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1583     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1584     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1585
1586     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1587     bo = dri_bo_alloc(i965->intel.bufmgr,
1588                       "intra row store",
1589                       width_in_mbs * 64,
1590                       0x1000);
1591     assert(bo);
1592     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1593     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1594
1595     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1596     bo = dri_bo_alloc(i965->intel.bufmgr,
1597                       "deblocking filter row store",
1598                       width_in_mbs * 7 * 64,
1599                       0x1000);
1600     assert(bo);
1601     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1602     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1603
1604     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1605     bo = dri_bo_alloc(i965->intel.bufmgr,
1606                       "bsd mpc row store",
1607                       width_in_mbs * 96,
1608                       0x1000);
1609     assert(bo);
1610     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1611     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1612
1613     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1614
1615     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1616     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1617
1618     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1619         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1620         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1621         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1622         int src_w, src_h;
1623         uint8_t *src = NULL, *dst = NULL;
1624
1625         assert(decode_state->bit_plane->buffer);
1626         src = decode_state->bit_plane->buffer;
1627
1628         bo = dri_bo_alloc(i965->intel.bufmgr,
1629                           "VC-1 Bitplane",
1630                           bitplane_width * height_in_mbs,
1631                           0x1000);
1632         assert(bo);
1633         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1634
1635         dri_bo_map(bo, True);
1636         assert(bo->virtual);
1637         dst = bo->virtual;
1638
1639         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1640             for (src_w = 0; src_w < width_in_mbs; src_w++) {
1641                 int src_index, dst_index;
1642                 int src_shift;
1643                 uint8_t src_value;
1644
1645                 src_index = (src_h * width_in_mbs + src_w) / 2;
1646                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1647                 src_value = ((src[src_index] >> src_shift) & 0xf);
1648
1649                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1650                     src_value |= 0x2;
1651                 }
1652
1653                 dst_index = src_w / 2;
1654                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1655             }
1656
1657             if (src_w & 1)
1658                 dst[src_w / 2] >>= 4;
1659
1660             dst += bitplane_width;
1661         }
1662
1663         dri_bo_unmap(bo);
1664     } else
1665         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1666 }
1667
1668 static void
1669 gen75_mfd_vc1_pic_state(VADriverContextP ctx,
1670                         struct decode_state *decode_state,
1671                         struct gen7_mfd_context *gen7_mfd_context)
1672 {
1673     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1674     VAPictureParameterBufferVC1 *pic_param;
1675     struct object_surface *obj_surface;
1676     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1677     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1678     int unified_mv_mode;
1679     int ref_field_pic_polarity = 0;
1680     int scale_factor = 0;
1681     int trans_ac_y = 0;
1682     int dmv_surface_valid = 0;
1683     int brfd = 0;
1684     int fcm = 0;
1685     int picture_type;
1686     int profile;
1687     int overlap;
1688     int interpolation_mode = 0;
1689
1690     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1691     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1692
1693     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1694     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1695     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1696     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1697     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1698     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1699     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1700     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1701
1702     if (dquant == 0) {
1703         alt_pquant_config = 0;
1704         alt_pquant_edge_mask = 0;
1705     } else if (dquant == 2) {
1706         alt_pquant_config = 1;
1707         alt_pquant_edge_mask = 0xf;
1708     } else {
1709         assert(dquant == 1);
1710         if (dquantfrm == 0) {
1711             alt_pquant_config = 0;
1712             alt_pquant_edge_mask = 0;
1713             alt_pq = 0;
1714         } else {
1715             assert(dquantfrm == 1);
1716             alt_pquant_config = 1;
1717
1718             switch (dqprofile) {
1719             case 3:
1720                 if (dqbilevel == 0) {
1721                     alt_pquant_config = 2;
1722                     alt_pquant_edge_mask = 0;
1723                 } else {
1724                     assert(dqbilevel == 1);
1725                     alt_pquant_config = 3;
1726                     alt_pquant_edge_mask = 0;
1727                 }
1728                 break;
1729
1730             case 0:
1731                 alt_pquant_edge_mask = 0xf;
1732                 break;
1733
1734             case 1:
1735                 if (dqdbedge == 3)
1736                     alt_pquant_edge_mask = 0x9;
1737                 else
1738                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1739
1740                 break;
1741
1742             case 2:
1743                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1744                 break;
1745
1746             default:
1747                 assert(0);
1748             }
1749         }
1750     }
1751
1752     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1753         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1754         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1755     } else {
1756         assert(pic_param->mv_fields.bits.mv_mode < 4);
1757         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1758     }
1759
1760     if (pic_param->sequence_fields.bits.interlace == 1 &&
1761         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1762         /* FIXME: calculate reference field picture polarity */
1763         assert(0);
1764         ref_field_pic_polarity = 0;
1765     }
1766
1767     if (pic_param->b_picture_fraction < 21)
1768         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1769
1770     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1771
1772     if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1773         picture_type == GEN7_VC1_I_PICTURE)
1774         picture_type = GEN7_VC1_BI_PICTURE;
1775
1776     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1777         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1778     else {
1779         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1780
1781         /*
1782          * 8.3.6.2.1 Transform Type Selection
1783          * If variable-sized transform coding is not enabled,
1784          * then the 8x8 transform shall be used for all blocks.
1785          * it is also MFX_VC1_PIC_STATE requirement.
1786          */
1787         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1788             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1789             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1790         }
1791     }
1792
1793     if (picture_type == GEN7_VC1_B_PICTURE) {
1794         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1795
1796         obj_surface = decode_state->reference_objects[1];
1797
1798         if (obj_surface)
1799             gen7_vc1_surface = obj_surface->private_data;
1800
1801         if (!gen7_vc1_surface ||
1802             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1803              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1804             dmv_surface_valid = 0;
1805         else
1806             dmv_surface_valid = 1;
1807     }
1808
1809     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1810
1811     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1812         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1813     else {
1814         if (pic_param->picture_fields.bits.top_field_first)
1815             fcm = 2;
1816         else
1817             fcm = 3;
1818     }
1819
1820     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1821         brfd = pic_param->reference_fields.bits.reference_distance;
1822         brfd = (scale_factor * brfd) >> 8;
1823         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1824
1825         if (brfd < 0)
1826             brfd = 0;
1827     }
1828
1829     overlap = pic_param->sequence_fields.bits.overlap;
1830
1831     if (overlap) {
1832         overlap = 0;
1833         if (profile != GEN7_VC1_ADVANCED_PROFILE) {
1834             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1835                 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1836                 overlap = 1;
1837             }
1838         } else {
1839             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1840                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1841                 overlap = 1;
1842             }
1843             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1844                 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE) {
1845                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1846                     overlap = 1;
1847                 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1848                            va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1849                     overlap = 1;
1850                 }
1851             }
1852         }
1853     }
1854
1855     assert(pic_param->conditional_overlap_flag < 3);
1856     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1857
1858     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1859         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1860          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1861         interpolation_mode = 9; /* Half-pel bilinear */
1862     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1863              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1864               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1865         interpolation_mode = 1; /* Half-pel bicubic */
1866     else
1867         interpolation_mode = 0; /* Quarter-pel bicubic */
1868
1869     BEGIN_BCS_BATCH(batch, 6);
1870     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1871     OUT_BCS_BATCH(batch,
1872                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1873                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1874     OUT_BCS_BATCH(batch,
1875                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1876                   dmv_surface_valid << 15 |
1877                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1878                   pic_param->rounding_control << 13 |
1879                   pic_param->sequence_fields.bits.syncmarker << 12 |
1880                   interpolation_mode << 8 |
1881                   0 << 7 | /* FIXME: scale up or down ??? */
1882                   pic_param->range_reduction_frame << 6 |
1883                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1884                   overlap << 4 |
1885                   !pic_param->picture_fields.bits.is_first_field << 3 |
1886                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1887     OUT_BCS_BATCH(batch,
1888                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1889                   picture_type << 26 |
1890                   fcm << 24 |
1891                   alt_pq << 16 |
1892                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1893                   scale_factor << 0);
1894     OUT_BCS_BATCH(batch,
1895                   unified_mv_mode << 28 |
1896                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1897                   pic_param->fast_uvmc_flag << 26 |
1898                   ref_field_pic_polarity << 25 |
1899                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1900                   pic_param->reference_fields.bits.reference_distance << 20 |
1901                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1902                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1903                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1904                   alt_pquant_edge_mask << 4 |
1905                   alt_pquant_config << 2 |
1906                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1907                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1908     OUT_BCS_BATCH(batch,
1909                   !!pic_param->bitplane_present.value << 31 |
1910                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1911                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1912                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1913                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1914                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1915                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1916                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1917                   pic_param->mv_fields.bits.mv_table << 20 |
1918                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1919                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1920                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1921                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1922                   pic_param->mb_mode_table << 8 |
1923                   trans_ac_y << 6 |
1924                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1925                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1926                   pic_param->cbp_table << 0);
1927     ADVANCE_BCS_BATCH(batch);
1928 }
1929
1930 static void
1931 gen75_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1932                               struct decode_state *decode_state,
1933                               struct gen7_mfd_context *gen7_mfd_context)
1934 {
1935     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1936     VAPictureParameterBufferVC1 *pic_param;
1937     int picture_type;
1938     int intensitycomp_single_fwd = 0;
1939     int luma_scale1 = 0;
1940     int luma_shift1 = 0;
1941
1942     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1943     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1944     picture_type = pic_param->picture_fields.bits.picture_type;
1945
1946     if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID) {
1947         if (picture_type == 1 || picture_type == 2) { /* P/B picture */
1948             struct gen7_vc1_surface *gen7_vc1_surface = gen7_mfd_context->reference_surface[0].obj_surface->private_data;
1949             if (gen7_vc1_surface) {
1950                 intensitycomp_single_fwd = gen7_vc1_surface->intensity_compensation;
1951                 luma_scale1 = gen7_vc1_surface->luma_scale;
1952                 luma_shift1 = gen7_vc1_surface->luma_shift;
1953             }
1954         }
1955     }
1956
1957     BEGIN_BCS_BATCH(batch, 6);
1958     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1959     OUT_BCS_BATCH(batch,
1960                   0 << 14 | /* FIXME: double ??? */
1961                   0 << 12 |
1962                   intensitycomp_single_fwd << 10 |
1963                   0 << 8 |
1964                   0 << 4 | /* FIXME: interlace mode */
1965                   0);
1966     OUT_BCS_BATCH(batch,
1967                   luma_shift1 << 16 |
1968                   luma_scale1 << 0);
1969     OUT_BCS_BATCH(batch, 0);
1970     OUT_BCS_BATCH(batch, 0);
1971     OUT_BCS_BATCH(batch, 0);
1972     ADVANCE_BCS_BATCH(batch);
1973 }
1974
1975 static void
1976 gen75_mfd_vc1_directmode_state_bplus(VADriverContextP ctx,
1977                                      struct decode_state *decode_state,
1978                                      struct gen7_mfd_context *gen7_mfd_context)
1979 {
1980     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1981     struct object_surface *obj_surface;
1982     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1983
1984     obj_surface = decode_state->render_object;
1985
1986     if (obj_surface && obj_surface->private_data) {
1987         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1988     }
1989
1990     obj_surface = decode_state->reference_objects[1];
1991
1992     if (obj_surface && obj_surface->private_data) {
1993         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1994     }
1995
1996     BEGIN_BCS_BATCH(batch, 7);
1997     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1998
1999     if (dmv_write_buffer)
2000         OUT_BCS_RELOC(batch, dmv_write_buffer,
2001                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2002                       0);
2003     else
2004         OUT_BCS_BATCH(batch, 0);
2005
2006     OUT_BCS_BATCH(batch, 0);
2007     OUT_BCS_BATCH(batch, 0);
2008
2009     if (dmv_read_buffer)
2010         OUT_BCS_RELOC(batch, dmv_read_buffer,
2011                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2012                       0);
2013     else
2014         OUT_BCS_BATCH(batch, 0);
2015     OUT_BCS_BATCH(batch, 0);
2016     OUT_BCS_BATCH(batch, 0);
2017
2018     ADVANCE_BCS_BATCH(batch);
2019 }
2020
2021 static void
2022 gen75_mfd_vc1_directmode_state(VADriverContextP ctx,
2023                                struct decode_state *decode_state,
2024                                struct gen7_mfd_context *gen7_mfd_context)
2025 {
2026     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2027     struct i965_driver_data *i965 = i965_driver_data(ctx);
2028     struct object_surface *obj_surface;
2029     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2030
2031     if (IS_STEPPING_BPLUS(i965)) {
2032         gen75_mfd_vc1_directmode_state_bplus(ctx, decode_state, gen7_mfd_context);
2033         return;
2034     }
2035
2036     obj_surface = decode_state->render_object;
2037
2038     if (obj_surface && obj_surface->private_data) {
2039         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2040     }
2041
2042     obj_surface = decode_state->reference_objects[1];
2043
2044     if (obj_surface && obj_surface->private_data) {
2045         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2046     }
2047
2048     BEGIN_BCS_BATCH(batch, 3);
2049     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
2050
2051     if (dmv_write_buffer)
2052         OUT_BCS_RELOC(batch, dmv_write_buffer,
2053                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2054                       0);
2055     else
2056         OUT_BCS_BATCH(batch, 0);
2057
2058     if (dmv_read_buffer)
2059         OUT_BCS_RELOC(batch, dmv_read_buffer,
2060                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2061                       0);
2062     else
2063         OUT_BCS_BATCH(batch, 0);
2064
2065     ADVANCE_BCS_BATCH(batch);
2066 }
2067
2068 static int
2069 gen75_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
2070 {
2071     int out_slice_data_bit_offset;
2072     int slice_header_size = in_slice_data_bit_offset / 8;
2073     int i, j;
2074
2075     if (profile != 3)
2076         out_slice_data_bit_offset = in_slice_data_bit_offset;
2077     else {
2078         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
2079             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
2080                 if (i < slice_header_size - 1)
2081                     i++, j += 2;
2082                 else {
2083                     buf[j + 2] = buf[j + 1];
2084                     j++;
2085                 }
2086             }
2087         }
2088
2089         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
2090     }
2091
2092     return out_slice_data_bit_offset;
2093 }
2094
2095 static void
2096 gen75_mfd_vc1_bsd_object(VADriverContextP ctx,
2097                          VAPictureParameterBufferVC1 *pic_param,
2098                          VASliceParameterBufferVC1 *slice_param,
2099                          VASliceParameterBufferVC1 *next_slice_param,
2100                          dri_bo *slice_data_bo,
2101                          struct gen7_mfd_context *gen7_mfd_context)
2102 {
2103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2104     int next_slice_start_vert_pos;
2105     int macroblock_offset;
2106     uint8_t *slice_data = NULL;
2107
2108     dri_bo_map(slice_data_bo, True);
2109     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
2110     macroblock_offset = gen75_mfd_vc1_get_macroblock_bit_offset(slice_data,
2111                                                                 slice_param->macroblock_offset,
2112                                                                 pic_param->sequence_fields.bits.profile);
2113     dri_bo_unmap(slice_data_bo);
2114
2115     if (next_slice_param)
2116         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
2117     else
2118         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
2119
2120     BEGIN_BCS_BATCH(batch, 5);
2121     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
2122     OUT_BCS_BATCH(batch,
2123                   slice_param->slice_data_size - (macroblock_offset >> 3));
2124     OUT_BCS_BATCH(batch,
2125                   slice_param->slice_data_offset + (macroblock_offset >> 3));
2126     OUT_BCS_BATCH(batch,
2127                   slice_param->slice_vertical_position << 16 |
2128                   next_slice_start_vert_pos << 0);
2129     OUT_BCS_BATCH(batch,
2130                   (macroblock_offset & 0x7));
2131     ADVANCE_BCS_BATCH(batch);
2132 }
2133
2134 static void
2135 gen75_mfd_vc1_decode_picture(VADriverContextP ctx,
2136                              struct decode_state *decode_state,
2137                              struct gen7_mfd_context *gen7_mfd_context)
2138 {
2139     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2140     VAPictureParameterBufferVC1 *pic_param;
2141     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
2142     dri_bo *slice_data_bo;
2143     int i, j;
2144
2145     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2146     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2147
2148     gen75_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
2149     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2150     intel_batchbuffer_emit_mi_flush(batch);
2151     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2152     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2153     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2154     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2155     gen75_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
2156     gen75_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
2157     gen75_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
2158
2159     for (j = 0; j < decode_state->num_slice_params; j++) {
2160         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2161         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2162         slice_data_bo = decode_state->slice_datas[j]->bo;
2163         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
2164
2165         if (j == decode_state->num_slice_params - 1)
2166             next_slice_group_param = NULL;
2167         else
2168             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2169
2170         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2171             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2172
2173             if (i < decode_state->slice_params[j]->num_elements - 1)
2174                 next_slice_param = slice_param + 1;
2175             else
2176                 next_slice_param = next_slice_group_param;
2177
2178             gen75_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2179             slice_param++;
2180         }
2181     }
2182
2183     intel_batchbuffer_end_atomic(batch);
2184     intel_batchbuffer_flush(batch);
2185 }
2186
2187 static void
2188 gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
2189                            struct decode_state *decode_state,
2190                            struct gen7_mfd_context *gen7_mfd_context)
2191 {
2192     struct object_surface *obj_surface;
2193     VAPictureParameterBufferJPEGBaseline *pic_param;
2194     int subsampling = SUBSAMPLE_YUV420;
2195     int fourcc = VA_FOURCC_IMC3;
2196
2197     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2198
2199     if (pic_param->num_components == 1) {
2200         subsampling = SUBSAMPLE_YUV400;
2201         fourcc = VA_FOURCC_Y800;
2202     } else if (pic_param->num_components == 3) {
2203         int h1 = pic_param->components[0].h_sampling_factor;
2204         int h2 = pic_param->components[1].h_sampling_factor;
2205         int h3 = pic_param->components[2].h_sampling_factor;
2206         int v1 = pic_param->components[0].v_sampling_factor;
2207         int v2 = pic_param->components[1].v_sampling_factor;
2208         int v3 = pic_param->components[2].v_sampling_factor;
2209
2210         if (h1 == 2 * h2 && h2 == h3 &&
2211             v1 == 2 * v2 && v2 == v3) {
2212             subsampling = SUBSAMPLE_YUV420;
2213             fourcc = VA_FOURCC_IMC3;
2214         } else if (h1 == 2 * h2  && h2 == h3 &&
2215                    v1 == v2 && v2 == v3) {
2216             subsampling = SUBSAMPLE_YUV422H;
2217             fourcc = VA_FOURCC_422H;
2218         } else if (h1 == h2 && h2 == h3 &&
2219                    v1 == v2  && v2 == v3) {
2220             subsampling = SUBSAMPLE_YUV444;
2221             fourcc = VA_FOURCC_444P;
2222         } else if (h1 == 4 * h2 && h2 ==  h3 &&
2223                    v1 == v2 && v2 == v3) {
2224             subsampling = SUBSAMPLE_YUV411;
2225             fourcc = VA_FOURCC_411P;
2226         } else if (h1 == h2 && h2 == h3 &&
2227                    v1 == 2 * v2 && v2 == v3) {
2228             subsampling = SUBSAMPLE_YUV422V;
2229             fourcc = VA_FOURCC_422V;
2230         } else
2231             assert(0);
2232     } else {
2233         assert(0);
2234     }
2235
2236     /* Current decoded picture */
2237     obj_surface = decode_state->render_object;
2238     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
2239
2240     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2241     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2242     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2243     gen7_mfd_context->pre_deblocking_output.valid = 1;
2244
2245     gen7_mfd_context->post_deblocking_output.bo = NULL;
2246     gen7_mfd_context->post_deblocking_output.valid = 0;
2247
2248     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2249     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2250
2251     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2252     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2253
2254     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2255     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2256
2257     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2258     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2259
2260     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2261     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2262 }
2263
2264 static const int va_to_gen7_jpeg_rotation[4] = {
2265     GEN7_JPEG_ROTATION_0,
2266     GEN7_JPEG_ROTATION_90,
2267     GEN7_JPEG_ROTATION_180,
2268     GEN7_JPEG_ROTATION_270
2269 };
2270
2271 static void
2272 gen75_mfd_jpeg_pic_state(VADriverContextP ctx,
2273                          struct decode_state *decode_state,
2274                          struct gen7_mfd_context *gen7_mfd_context)
2275 {
2276     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2277     VAPictureParameterBufferJPEGBaseline *pic_param;
2278     int chroma_type = GEN7_YUV420;
2279     int frame_width_in_blks;
2280     int frame_height_in_blks;
2281
2282     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2283     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2284
2285     if (pic_param->num_components == 1)
2286         chroma_type = GEN7_YUV400;
2287     else if (pic_param->num_components == 3) {
2288         int h1 = pic_param->components[0].h_sampling_factor;
2289         int h2 = pic_param->components[1].h_sampling_factor;
2290         int h3 = pic_param->components[2].h_sampling_factor;
2291         int v1 = pic_param->components[0].v_sampling_factor;
2292         int v2 = pic_param->components[1].v_sampling_factor;
2293         int v3 = pic_param->components[2].v_sampling_factor;
2294
2295         if (h1 == 2 * h2 && h2 == h3 &&
2296             v1 == 2 * v2 && v2 == v3)
2297             chroma_type = GEN7_YUV420;
2298         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2299                  v1 == 1 && v2 == 1 && v3 == 1)
2300             chroma_type = GEN7_YUV422H_2Y;
2301         else if (h1 == h2 && h2 == h3 &&
2302                  v1 == v2 && v2 == v3)
2303             chroma_type = GEN7_YUV444;
2304         else if (h1 == 4 * h2 && h2 == h3 &&
2305                  v1 == v2 && v2 == v3)
2306             chroma_type = GEN7_YUV411;
2307         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2308                  v1 == 2 && v2 == 1 && v3 == 1)
2309             chroma_type = GEN7_YUV422V_2Y;
2310         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2311                  v1 == 2 && v2 == 2 && v3 == 2)
2312             chroma_type = GEN7_YUV422H_4Y;
2313         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2314                  v1 == 2 && v2 == 1 && v3 == 1)
2315             chroma_type = GEN7_YUV422V_4Y;
2316         else
2317             assert(0);
2318     }
2319
2320     if (chroma_type == GEN7_YUV400 ||
2321         chroma_type == GEN7_YUV444 ||
2322         chroma_type == GEN7_YUV422V_2Y) {
2323         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2324         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2325     } else if (chroma_type == GEN7_YUV411) {
2326         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2327         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2328     } else {
2329         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2330         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2331     }
2332
2333     BEGIN_BCS_BATCH(batch, 3);
2334     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2335     OUT_BCS_BATCH(batch,
2336                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2337                   (chroma_type << 0));
2338     OUT_BCS_BATCH(batch,
2339                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2340                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2341     ADVANCE_BCS_BATCH(batch);
2342 }
2343
2344 static const int va_to_gen7_jpeg_hufftable[2] = {
2345     MFX_HUFFTABLE_ID_Y,
2346     MFX_HUFFTABLE_ID_UV
2347 };
2348
2349 static void
2350 gen75_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2351                                 struct decode_state *decode_state,
2352                                 struct gen7_mfd_context *gen7_mfd_context,
2353                                 int num_tables)
2354 {
2355     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2356     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2357     int index;
2358
2359     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2360         return;
2361
2362     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2363
2364     for (index = 0; index < num_tables; index++) {
2365         int id = va_to_gen7_jpeg_hufftable[index];
2366
2367         if (!huffman_table->load_huffman_table[index])
2368             continue;
2369
2370         BEGIN_BCS_BATCH(batch, 53);
2371         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2372         OUT_BCS_BATCH(batch, id);
2373         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2374         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2375         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2376         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2377         ADVANCE_BCS_BATCH(batch);
2378     }
2379 }
2380
2381 static const int va_to_gen7_jpeg_qm[5] = {
2382     -1,
2383     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2384     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2385     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2386     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2387 };
2388
2389 static void
2390 gen75_mfd_jpeg_qm_state(VADriverContextP ctx,
2391                         struct decode_state *decode_state,
2392                         struct gen7_mfd_context *gen7_mfd_context)
2393 {
2394     VAPictureParameterBufferJPEGBaseline *pic_param;
2395     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2396     int index;
2397
2398     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2399         return;
2400
2401     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2402     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2403
2404     assert(pic_param->num_components <= 3);
2405
2406     for (index = 0; index < pic_param->num_components; index++) {
2407         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2408         int qm_type;
2409         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2410         unsigned char raster_qm[64];
2411         int j;
2412
2413         if (id > 4 || id < 1)
2414             continue;
2415
2416         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2417             continue;
2418
2419         qm_type = va_to_gen7_jpeg_qm[id];
2420
2421         for (j = 0; j < 64; j++)
2422             raster_qm[zigzag_direct[j]] = qm[j];
2423
2424         gen75_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2425     }
2426 }
2427
2428 static void
2429 gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
2430                           VAPictureParameterBufferJPEGBaseline *pic_param,
2431                           VASliceParameterBufferJPEGBaseline *slice_param,
2432                           VASliceParameterBufferJPEGBaseline *next_slice_param,
2433                           dri_bo *slice_data_bo,
2434                           struct gen7_mfd_context *gen7_mfd_context)
2435 {
2436     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2437     int scan_component_mask = 0;
2438     int i;
2439
2440     assert(slice_param->num_components > 0);
2441     assert(slice_param->num_components < 4);
2442     assert(slice_param->num_components <= pic_param->num_components);
2443
2444     for (i = 0; i < slice_param->num_components; i++) {
2445         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2446         case 1:
2447             scan_component_mask |= (1 << 0);
2448             break;
2449         case 2:
2450             scan_component_mask |= (1 << 1);
2451             break;
2452         case 3:
2453             scan_component_mask |= (1 << 2);
2454             break;
2455         default:
2456             assert(0);
2457             break;
2458         }
2459     }
2460
2461     BEGIN_BCS_BATCH(batch, 6);
2462     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2463     OUT_BCS_BATCH(batch,
2464                   slice_param->slice_data_size);
2465     OUT_BCS_BATCH(batch,
2466                   slice_param->slice_data_offset);
2467     OUT_BCS_BATCH(batch,
2468                   slice_param->slice_horizontal_position << 16 |
2469                   slice_param->slice_vertical_position << 0);
2470     OUT_BCS_BATCH(batch,
2471                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2472                   (scan_component_mask << 27) |                 /* scan components */
2473                   (0 << 26) |   /* disable interrupt allowed */
2474                   (slice_param->num_mcus << 0));                /* MCU count */
2475     OUT_BCS_BATCH(batch,
2476                   (slice_param->restart_interval << 0));    /* RestartInterval */
2477     ADVANCE_BCS_BATCH(batch);
2478 }
2479
2480 /* Workaround for JPEG decoding on Ivybridge */
2481
2482 static struct {
2483     int width;
2484     int height;
2485     unsigned char data[32];
2486     int data_size;
2487     int data_bit_offset;
2488     int qp;
2489 } gen7_jpeg_wa_clip = {
2490     16,
2491     16,
2492     {
2493         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2494         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2495     },
2496     14,
2497     40,
2498     28,
2499 };
2500
2501 static void
2502 gen75_jpeg_wa_init(VADriverContextP ctx,
2503                    struct gen7_mfd_context *gen7_mfd_context)
2504 {
2505     struct i965_driver_data *i965 = i965_driver_data(ctx);
2506     VAStatus status;
2507     struct object_surface *obj_surface;
2508
2509     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2510         i965_DestroySurfaces(ctx,
2511                              &gen7_mfd_context->jpeg_wa_surface_id,
2512                              1);
2513
2514     status = i965_CreateSurfaces(ctx,
2515                                  gen7_jpeg_wa_clip.width,
2516                                  gen7_jpeg_wa_clip.height,
2517                                  VA_RT_FORMAT_YUV420,
2518                                  1,
2519                                  &gen7_mfd_context->jpeg_wa_surface_id);
2520     assert(status == VA_STATUS_SUCCESS);
2521
2522     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2523     assert(obj_surface);
2524     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2525     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2526
2527     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2528         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2529                                                                "JPEG WA data",
2530                                                                0x1000,
2531                                                                0x1000);
2532         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2533                        0,
2534                        gen7_jpeg_wa_clip.data_size,
2535                        gen7_jpeg_wa_clip.data);
2536     }
2537 }
2538
2539 static void
2540 gen75_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2541                                struct gen7_mfd_context *gen7_mfd_context)
2542 {
2543     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2544
2545     BEGIN_BCS_BATCH(batch, 5);
2546     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2547     OUT_BCS_BATCH(batch,
2548                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2549                   (MFD_MODE_VLD << 15) | /* VLD mode */
2550                   (0 << 10) | /* disable Stream-Out */
2551                   (0 << 9)  | /* Post Deblocking Output */
2552                   (1 << 8)  | /* Pre Deblocking Output */
2553                   (0 << 5)  | /* not in stitch mode */
2554                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2555                   (MFX_FORMAT_AVC << 0));
2556     OUT_BCS_BATCH(batch,
2557                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2558                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2559                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2560                   (0 << 1)  |
2561                   (0 << 0));
2562     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2563     OUT_BCS_BATCH(batch, 0); /* reserved */
2564     ADVANCE_BCS_BATCH(batch);
2565 }
2566
2567 static void
2568 gen75_jpeg_wa_surface_state(VADriverContextP ctx,
2569                             struct gen7_mfd_context *gen7_mfd_context)
2570 {
2571     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2572     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2573
2574     BEGIN_BCS_BATCH(batch, 6);
2575     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2576     OUT_BCS_BATCH(batch, 0);
2577     OUT_BCS_BATCH(batch,
2578                   ((obj_surface->orig_width - 1) << 18) |
2579                   ((obj_surface->orig_height - 1) << 4));
2580     OUT_BCS_BATCH(batch,
2581                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2582                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2583                   (0 << 22) | /* surface object control state, ignored */
2584                   ((obj_surface->width - 1) << 3) | /* pitch */
2585                   (0 << 2)  | /* must be 0 */
2586                   (1 << 1)  | /* must be tiled */
2587                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2588     OUT_BCS_BATCH(batch,
2589                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2590                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2591     OUT_BCS_BATCH(batch,
2592                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2593                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2594     ADVANCE_BCS_BATCH(batch);
2595 }
2596
2597 static void
2598 gen75_jpeg_wa_pipe_buf_addr_state_bplus(VADriverContextP ctx,
2599                                         struct gen7_mfd_context *gen7_mfd_context)
2600 {
2601     struct i965_driver_data *i965 = i965_driver_data(ctx);
2602     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2603     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2604     dri_bo *intra_bo;
2605     int i;
2606
2607     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2608                             "intra row store",
2609                             128 * 64,
2610                             0x1000);
2611
2612     BEGIN_BCS_BATCH(batch, 61);
2613     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2614     OUT_BCS_RELOC(batch,
2615                   obj_surface->bo,
2616                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2617                   0);
2618     OUT_BCS_BATCH(batch, 0);
2619     OUT_BCS_BATCH(batch, 0);
2620
2621
2622     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2623     OUT_BCS_BATCH(batch, 0);
2624     OUT_BCS_BATCH(batch, 0);
2625
2626     /* uncompressed-video & stream out 7-12 */
2627     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2628     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2629     OUT_BCS_BATCH(batch, 0);
2630     OUT_BCS_BATCH(batch, 0);
2631     OUT_BCS_BATCH(batch, 0);
2632     OUT_BCS_BATCH(batch, 0);
2633
2634     /* the DW 13-15 is for intra row store scratch */
2635     OUT_BCS_RELOC(batch,
2636                   intra_bo,
2637                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2638                   0);
2639     OUT_BCS_BATCH(batch, 0);
2640     OUT_BCS_BATCH(batch, 0);
2641
2642     /* the DW 16-18 is for deblocking filter */
2643     OUT_BCS_BATCH(batch, 0);
2644     OUT_BCS_BATCH(batch, 0);
2645     OUT_BCS_BATCH(batch, 0);
2646
2647     /* DW 19..50 */
2648     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2649         OUT_BCS_BATCH(batch, 0);
2650         OUT_BCS_BATCH(batch, 0);
2651     }
2652     OUT_BCS_BATCH(batch, 0);
2653
2654     /* the DW52-54 is for mb status address */
2655     OUT_BCS_BATCH(batch, 0);
2656     OUT_BCS_BATCH(batch, 0);
2657     OUT_BCS_BATCH(batch, 0);
2658     /* the DW56-60 is for ILDB & second ILDB address */
2659     OUT_BCS_BATCH(batch, 0);
2660     OUT_BCS_BATCH(batch, 0);
2661     OUT_BCS_BATCH(batch, 0);
2662     OUT_BCS_BATCH(batch, 0);
2663     OUT_BCS_BATCH(batch, 0);
2664     OUT_BCS_BATCH(batch, 0);
2665
2666     ADVANCE_BCS_BATCH(batch);
2667
2668     dri_bo_unreference(intra_bo);
2669 }
2670
2671 static void
2672 gen75_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2673                                   struct gen7_mfd_context *gen7_mfd_context)
2674 {
2675     struct i965_driver_data *i965 = i965_driver_data(ctx);
2676     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2677     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2678     dri_bo *intra_bo;
2679     int i;
2680
2681     if (IS_STEPPING_BPLUS(i965)) {
2682         gen75_jpeg_wa_pipe_buf_addr_state_bplus(ctx, gen7_mfd_context);
2683         return;
2684     }
2685
2686     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2687                             "intra row store",
2688                             128 * 64,
2689                             0x1000);
2690
2691     BEGIN_BCS_BATCH(batch, 25);
2692     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
2693     OUT_BCS_RELOC(batch,
2694                   obj_surface->bo,
2695                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2696                   0);
2697
2698     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2699
2700     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2701     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2702
2703     OUT_BCS_RELOC(batch,
2704                   intra_bo,
2705                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2706                   0);
2707
2708     OUT_BCS_BATCH(batch, 0);
2709
2710     /* DW 7..22 */
2711     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2712         OUT_BCS_BATCH(batch, 0);
2713     }
2714
2715     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2716     OUT_BCS_BATCH(batch, 0);
2717     ADVANCE_BCS_BATCH(batch);
2718
2719     dri_bo_unreference(intra_bo);
2720 }
2721
2722 static void
2723 gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
2724                                             struct gen7_mfd_context *gen7_mfd_context)
2725 {
2726     struct i965_driver_data *i965 = i965_driver_data(ctx);
2727     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2728     dri_bo *bsd_mpc_bo, *mpr_bo;
2729
2730     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2731                               "bsd mpc row store",
2732                               11520, /* 1.5 * 120 * 64 */
2733                               0x1000);
2734
2735     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2736                           "mpr row store",
2737                           7680, /* 1. 0 * 120 * 64 */
2738                           0x1000);
2739
2740     BEGIN_BCS_BATCH(batch, 10);
2741     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2742
2743     OUT_BCS_RELOC(batch,
2744                   bsd_mpc_bo,
2745                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2746                   0);
2747
2748     OUT_BCS_BATCH(batch, 0);
2749     OUT_BCS_BATCH(batch, 0);
2750
2751     OUT_BCS_RELOC(batch,
2752                   mpr_bo,
2753                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2754                   0);
2755     OUT_BCS_BATCH(batch, 0);
2756     OUT_BCS_BATCH(batch, 0);
2757
2758     OUT_BCS_BATCH(batch, 0);
2759     OUT_BCS_BATCH(batch, 0);
2760     OUT_BCS_BATCH(batch, 0);
2761
2762     ADVANCE_BCS_BATCH(batch);
2763
2764     dri_bo_unreference(bsd_mpc_bo);
2765     dri_bo_unreference(mpr_bo);
2766 }
2767
2768 static void
2769 gen75_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2770                                       struct gen7_mfd_context *gen7_mfd_context)
2771 {
2772     struct i965_driver_data *i965 = i965_driver_data(ctx);
2773     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2774     dri_bo *bsd_mpc_bo, *mpr_bo;
2775
2776     if (IS_STEPPING_BPLUS(i965)) {
2777         gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(ctx, gen7_mfd_context);
2778         return;
2779     }
2780
2781     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2782                               "bsd mpc row store",
2783                               11520, /* 1.5 * 120 * 64 */
2784                               0x1000);
2785
2786     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2787                           "mpr row store",
2788                           7680, /* 1. 0 * 120 * 64 */
2789                           0x1000);
2790
2791     BEGIN_BCS_BATCH(batch, 4);
2792     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2793
2794     OUT_BCS_RELOC(batch,
2795                   bsd_mpc_bo,
2796                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2797                   0);
2798
2799     OUT_BCS_RELOC(batch,
2800                   mpr_bo,
2801                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2802                   0);
2803     OUT_BCS_BATCH(batch, 0);
2804
2805     ADVANCE_BCS_BATCH(batch);
2806
2807     dri_bo_unreference(bsd_mpc_bo);
2808     dri_bo_unreference(mpr_bo);
2809 }
2810
2811 static void
2812 gen75_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2813                            struct gen7_mfd_context *gen7_mfd_context)
2814 {
2815
2816 }
2817
2818 static void
2819 gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
2820                             struct gen7_mfd_context *gen7_mfd_context)
2821 {
2822     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2823     int img_struct = 0;
2824     int mbaff_frame_flag = 0;
2825     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2826
2827     BEGIN_BCS_BATCH(batch, 16);
2828     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2829     OUT_BCS_BATCH(batch,
2830                   (width_in_mbs * height_in_mbs - 1));
2831     OUT_BCS_BATCH(batch,
2832                   ((height_in_mbs - 1) << 16) |
2833                   ((width_in_mbs - 1) << 0));
2834     OUT_BCS_BATCH(batch,
2835                   (0 << 24) |
2836                   (0 << 16) |
2837                   (0 << 14) |
2838                   (0 << 13) |
2839                   (0 << 12) | /* differ from GEN6 */
2840                   (0 << 10) |
2841                   (img_struct << 8));
2842     OUT_BCS_BATCH(batch,
2843                   (1 << 10) | /* 4:2:0 */
2844                   (1 << 7) |  /* CABAC */
2845                   (0 << 6) |
2846                   (0 << 5) |
2847                   (0 << 4) |
2848                   (0 << 3) |
2849                   (1 << 2) |
2850                   (mbaff_frame_flag << 1) |
2851                   (0 << 0));
2852     OUT_BCS_BATCH(batch, 0);
2853     OUT_BCS_BATCH(batch, 0);
2854     OUT_BCS_BATCH(batch, 0);
2855     OUT_BCS_BATCH(batch, 0);
2856     OUT_BCS_BATCH(batch, 0);
2857     OUT_BCS_BATCH(batch, 0);
2858     OUT_BCS_BATCH(batch, 0);
2859     OUT_BCS_BATCH(batch, 0);
2860     OUT_BCS_BATCH(batch, 0);
2861     OUT_BCS_BATCH(batch, 0);
2862     OUT_BCS_BATCH(batch, 0);
2863     ADVANCE_BCS_BATCH(batch);
2864 }
2865
2866 static void
2867 gen75_jpeg_wa_avc_directmode_state_bplus(VADriverContextP ctx,
2868                                          struct gen7_mfd_context *gen7_mfd_context)
2869 {
2870     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2871     int i;
2872
2873     BEGIN_BCS_BATCH(batch, 71);
2874     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2875
2876     /* reference surfaces 0..15 */
2877     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2878         OUT_BCS_BATCH(batch, 0); /* top */
2879         OUT_BCS_BATCH(batch, 0); /* bottom */
2880     }
2881
2882     OUT_BCS_BATCH(batch, 0);
2883
2884     /* the current decoding frame/field */
2885     OUT_BCS_BATCH(batch, 0); /* top */
2886     OUT_BCS_BATCH(batch, 0);
2887     OUT_BCS_BATCH(batch, 0);
2888
2889     /* POC List */
2890     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2891         OUT_BCS_BATCH(batch, 0);
2892         OUT_BCS_BATCH(batch, 0);
2893     }
2894
2895     OUT_BCS_BATCH(batch, 0);
2896     OUT_BCS_BATCH(batch, 0);
2897
2898     ADVANCE_BCS_BATCH(batch);
2899 }
2900
2901 static void
2902 gen75_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2903                                    struct gen7_mfd_context *gen7_mfd_context)
2904 {
2905     struct i965_driver_data *i965 = i965_driver_data(ctx);
2906     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2907     int i;
2908
2909     if (IS_STEPPING_BPLUS(i965)) {
2910         gen75_jpeg_wa_avc_directmode_state_bplus(ctx, gen7_mfd_context);
2911         return;
2912     }
2913
2914     BEGIN_BCS_BATCH(batch, 69);
2915     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
2916
2917     /* reference surfaces 0..15 */
2918     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2919         OUT_BCS_BATCH(batch, 0); /* top */
2920         OUT_BCS_BATCH(batch, 0); /* bottom */
2921     }
2922
2923     /* the current decoding frame/field */
2924     OUT_BCS_BATCH(batch, 0); /* top */
2925     OUT_BCS_BATCH(batch, 0); /* bottom */
2926
2927     /* POC List */
2928     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2929         OUT_BCS_BATCH(batch, 0);
2930         OUT_BCS_BATCH(batch, 0);
2931     }
2932
2933     OUT_BCS_BATCH(batch, 0);
2934     OUT_BCS_BATCH(batch, 0);
2935
2936     ADVANCE_BCS_BATCH(batch);
2937 }
2938
2939 static void
2940 gen75_jpeg_wa_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
2941                                             struct gen7_mfd_context *gen7_mfd_context)
2942 {
2943     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2944
2945     BEGIN_BCS_BATCH(batch, 11);
2946     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2947     OUT_BCS_RELOC(batch,
2948                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2949                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2950                   0);
2951     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2952     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2953     OUT_BCS_BATCH(batch, 0);
2954     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2955     OUT_BCS_BATCH(batch, 0);
2956     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2957     OUT_BCS_BATCH(batch, 0);
2958     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2959     OUT_BCS_BATCH(batch, 0);
2960     ADVANCE_BCS_BATCH(batch);
2961 }
2962
2963 static void
2964 gen75_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2965                                       struct gen7_mfd_context *gen7_mfd_context)
2966 {
2967     struct i965_driver_data *i965 = i965_driver_data(ctx);
2968     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2969
2970     if (IS_STEPPING_BPLUS(i965)) {
2971         gen75_jpeg_wa_ind_obj_base_addr_state_bplus(ctx, gen7_mfd_context);
2972         return;
2973     }
2974
2975     BEGIN_BCS_BATCH(batch, 11);
2976     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2977     OUT_BCS_RELOC(batch,
2978                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2979                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2980                   0);
2981     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2982     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2983     OUT_BCS_BATCH(batch, 0);
2984     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2985     OUT_BCS_BATCH(batch, 0);
2986     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2987     OUT_BCS_BATCH(batch, 0);
2988     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2989     OUT_BCS_BATCH(batch, 0);
2990     ADVANCE_BCS_BATCH(batch);
2991 }
2992
2993 static void
2994 gen75_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2995                              struct gen7_mfd_context *gen7_mfd_context)
2996 {
2997     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2998
2999     /* the input bitsteam format on GEN7 differs from GEN6 */
3000     BEGIN_BCS_BATCH(batch, 6);
3001     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
3002     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
3003     OUT_BCS_BATCH(batch, 0);
3004     OUT_BCS_BATCH(batch,
3005                   (0 << 31) |
3006                   (0 << 14) |
3007                   (0 << 12) |
3008                   (0 << 10) |
3009                   (0 << 8));
3010     OUT_BCS_BATCH(batch,
3011                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
3012                   (0 << 5)  |
3013                   (0 << 4)  |
3014                   (1 << 3) | /* LastSlice Flag */
3015                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
3016     OUT_BCS_BATCH(batch, 0);
3017     ADVANCE_BCS_BATCH(batch);
3018 }
3019
3020 static void
3021 gen75_jpeg_wa_avc_slice_state(VADriverContextP ctx,
3022                               struct gen7_mfd_context *gen7_mfd_context)
3023 {
3024     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3025     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
3026     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
3027     int first_mb_in_slice = 0;
3028     int slice_type = SLICE_TYPE_I;
3029
3030     BEGIN_BCS_BATCH(batch, 11);
3031     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3032     OUT_BCS_BATCH(batch, slice_type);
3033     OUT_BCS_BATCH(batch,
3034                   (num_ref_idx_l1 << 24) |
3035                   (num_ref_idx_l0 << 16) |
3036                   (0 << 8) |
3037                   (0 << 0));
3038     OUT_BCS_BATCH(batch,
3039                   (0 << 29) |
3040                   (1 << 27) |   /* disable Deblocking */
3041                   (0 << 24) |
3042                   (gen7_jpeg_wa_clip.qp << 16) |
3043                   (0 << 8) |
3044                   (0 << 0));
3045     OUT_BCS_BATCH(batch,
3046                   (slice_ver_pos << 24) |
3047                   (slice_hor_pos << 16) |
3048                   (first_mb_in_slice << 0));
3049     OUT_BCS_BATCH(batch,
3050                   (next_slice_ver_pos << 16) |
3051                   (next_slice_hor_pos << 0));
3052     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
3053     OUT_BCS_BATCH(batch, 0);
3054     OUT_BCS_BATCH(batch, 0);
3055     OUT_BCS_BATCH(batch, 0);
3056     OUT_BCS_BATCH(batch, 0);
3057     ADVANCE_BCS_BATCH(batch);
3058 }
3059
3060 static void
3061 gen75_mfd_jpeg_wa(VADriverContextP ctx,
3062                   struct gen7_mfd_context *gen7_mfd_context)
3063 {
3064     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3065     gen75_jpeg_wa_init(ctx, gen7_mfd_context);
3066     intel_batchbuffer_emit_mi_flush(batch);
3067     gen75_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
3068     gen75_jpeg_wa_surface_state(ctx, gen7_mfd_context);
3069     gen75_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
3070     gen75_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
3071     gen75_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
3072     gen75_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
3073     gen75_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
3074
3075     gen75_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
3076     gen75_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
3077     gen75_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
3078 }
3079
3080 void
3081 gen75_mfd_jpeg_decode_picture(VADriverContextP ctx,
3082                               struct decode_state *decode_state,
3083                               struct gen7_mfd_context *gen7_mfd_context)
3084 {
3085     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3086     VAPictureParameterBufferJPEGBaseline *pic_param;
3087     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
3088     dri_bo *slice_data_bo;
3089     int i, j, max_selector = 0;
3090
3091     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3092     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
3093
3094     /* Currently only support Baseline DCT */
3095     gen75_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
3096     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3097     gen75_mfd_jpeg_wa(ctx, gen7_mfd_context);
3098     intel_batchbuffer_emit_mi_flush(batch);
3099     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3100     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3101     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3102     gen75_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
3103     gen75_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
3104
3105     for (j = 0; j < decode_state->num_slice_params; j++) {
3106         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3107         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3108         slice_data_bo = decode_state->slice_datas[j]->bo;
3109         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3110
3111         if (j == decode_state->num_slice_params - 1)
3112             next_slice_group_param = NULL;
3113         else
3114             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3115
3116         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3117             int component;
3118
3119             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3120
3121             if (i < decode_state->slice_params[j]->num_elements - 1)
3122                 next_slice_param = slice_param + 1;
3123             else
3124                 next_slice_param = next_slice_group_param;
3125
3126             for (component = 0; component < slice_param->num_components; component++) {
3127                 if (max_selector < slice_param->components[component].dc_table_selector)
3128                     max_selector = slice_param->components[component].dc_table_selector;
3129
3130                 if (max_selector < slice_param->components[component].ac_table_selector)
3131                     max_selector = slice_param->components[component].ac_table_selector;
3132             }
3133
3134             slice_param++;
3135         }
3136     }
3137
3138     assert(max_selector < 2);
3139     gen75_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
3140
3141     for (j = 0; j < decode_state->num_slice_params; j++) {
3142         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3143         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3144         slice_data_bo = decode_state->slice_datas[j]->bo;
3145         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3146
3147         if (j == decode_state->num_slice_params - 1)
3148             next_slice_group_param = NULL;
3149         else
3150             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3151
3152         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3153             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3154
3155             if (i < decode_state->slice_params[j]->num_elements - 1)
3156                 next_slice_param = slice_param + 1;
3157             else
3158                 next_slice_param = next_slice_group_param;
3159
3160             gen75_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
3161             slice_param++;
3162         }
3163     }
3164
3165     intel_batchbuffer_end_atomic(batch);
3166     intel_batchbuffer_flush(batch);
3167 }
3168
3169 static VAStatus
3170 gen75_mfd_decode_picture(VADriverContextP ctx,
3171                          VAProfile profile,
3172                          union codec_state *codec_state,
3173                          struct hw_context *hw_context)
3174
3175 {
3176     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3177     struct decode_state *decode_state = &codec_state->decode;
3178     VAStatus vaStatus;
3179
3180     assert(gen7_mfd_context);
3181
3182     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3183
3184     if (vaStatus != VA_STATUS_SUCCESS)
3185         goto out;
3186
3187     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3188
3189     switch (profile) {
3190     case VAProfileMPEG2Simple:
3191     case VAProfileMPEG2Main:
3192         gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3193         break;
3194
3195     case VAProfileH264ConstrainedBaseline:
3196     case VAProfileH264Main:
3197     case VAProfileH264High:
3198     case VAProfileH264StereoHigh:
3199     case VAProfileH264MultiviewHigh:
3200         gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3201         break;
3202
3203     case VAProfileVC1Simple:
3204     case VAProfileVC1Main:
3205     case VAProfileVC1Advanced:
3206         gen75_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3207         break;
3208
3209     case VAProfileJPEGBaseline:
3210         gen75_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3211         break;
3212
3213     default:
3214         assert(0);
3215         break;
3216     }
3217
3218     vaStatus = VA_STATUS_SUCCESS;
3219
3220 out:
3221     return vaStatus;
3222 }
3223
3224 static void
3225 gen75_mfd_context_destroy(void *hw_context)
3226 {
3227     VADriverContextP ctx;
3228     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3229
3230     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3231
3232     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3233     gen7_mfd_context->post_deblocking_output.bo = NULL;
3234
3235     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3236     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3237
3238     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3239     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3240
3241     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3242     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3243
3244     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3245     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3246
3247     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3248     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3249
3250     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3251     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3252
3253     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3254
3255     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3256         i965_DestroySurfaces(ctx,
3257                              &gen7_mfd_context->jpeg_wa_surface_id,
3258                              1);
3259         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3260     }
3261
3262     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3263     free(gen7_mfd_context);
3264 }
3265
3266 static void gen75_mfd_mpeg2_context_init(VADriverContextP ctx,
3267                                          struct gen7_mfd_context *gen7_mfd_context)
3268 {
3269     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3270     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3271     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3272     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3273 }
3274
3275 struct hw_context *
3276 gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3277 {
3278     struct intel_driver_data *intel = intel_driver_data(ctx);
3279     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3280     int i;
3281
3282     assert(gen7_mfd_context);
3283     gen7_mfd_context->base.destroy = gen75_mfd_context_destroy;
3284     gen7_mfd_context->base.run = gen75_mfd_decode_picture;
3285     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3286
3287     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3288         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3289         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3290         gen7_mfd_context->reference_surface[i].obj_surface = NULL;
3291     }
3292
3293     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3294     gen7_mfd_context->jpeg_wa_surface_object = NULL;
3295
3296     switch (obj_config->profile) {
3297     case VAProfileMPEG2Simple:
3298     case VAProfileMPEG2Main:
3299         gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3300         break;
3301
3302     case VAProfileH264ConstrainedBaseline:
3303     case VAProfileH264Main:
3304     case VAProfileH264High:
3305     case VAProfileH264StereoHigh:
3306     case VAProfileH264MultiviewHigh:
3307         gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
3308         break;
3309     default:
3310         break;
3311     }
3312
3313     gen7_mfd_context->driver_context = ctx;
3314     return (struct hw_context *)gen7_mfd_context;
3315 }