OSDN Git Service

genX_mfd: remove mapping of I frame to BI frame
[android-x86/hardware-intel-common-vaapi.git] / src / gen75_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui  <yakui.zhao@intel.com>
27  *
28  */
29 #include "sysdeps.h"
30
31 #include <va/va_dec_jpeg.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35 #include "i965_defines.h"
36 #include "i965_drv_video.h"
37 #include "i965_decoder_utils.h"
38 #include "gen7_mfd.h"
39 #include "intel_media.h"
40
41 #define B0_STEP_REV     2
42 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
43
44 static const uint32_t zigzag_direct[64] = {
45     0,   1,  8, 16,  9,  2,  3, 10,
46     17, 24, 32, 25, 18, 11,  4,  5,
47     12, 19, 26, 33, 40, 48, 41, 34,
48     27, 20, 13,  6,  7, 14, 21, 28,
49     35, 42, 49, 56, 57, 50, 43, 36,
50     29, 22, 15, 23, 30, 37, 44, 51,
51     58, 59, 52, 45, 38, 31, 39, 46,
52     53, 60, 61, 54, 47, 55, 62, 63
53 };
54
55 static void
56 gen75_mfd_init_avc_surface(VADriverContextP ctx,
57                            VAPictureParameterBufferH264 *pic_param,
58                            struct object_surface *obj_surface)
59 {
60     struct i965_driver_data *i965 = i965_driver_data(ctx);
61     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
62     int width_in_mbs, height_in_mbs;
63
64     obj_surface->free_private_data = gen_free_avc_surface;
65     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
66     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
67
68     if (!gen7_avc_surface) {
69         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
70         assert(gen7_avc_surface);
71         gen7_avc_surface->base.frame_store_id = -1;
72         assert((obj_surface->size & 0x3f) == 0);
73         obj_surface->private_data = gen7_avc_surface;
74     }
75
76     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
77                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
78
79     if (gen7_avc_surface->dmv_top == NULL) {
80         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
81                                                  "direct mv w/r buffer",
82                                                  width_in_mbs * height_in_mbs * 128,
83                                                  0x1000);
84         assert(gen7_avc_surface->dmv_top);
85     }
86
87     if (gen7_avc_surface->dmv_bottom_flag &&
88         gen7_avc_surface->dmv_bottom == NULL) {
89         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
90                                                     "direct mv w/r buffer",
91                                                     width_in_mbs * height_in_mbs * 128,
92                                                     0x1000);
93         assert(gen7_avc_surface->dmv_bottom);
94     }
95 }
96
97 static void
98 gen75_mfd_pipe_mode_select(VADriverContextP ctx,
99                            struct decode_state *decode_state,
100                            int standard_select,
101                            struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG);
109
110     BEGIN_BCS_BATCH(batch, 5);
111     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
112     OUT_BCS_BATCH(batch,
113                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
114                   (MFD_MODE_VLD << 15) | /* VLD mode */
115                   (0 << 10) | /* disable Stream-Out */
116                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
117                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
118                   (0 << 5)  | /* not in stitch mode */
119                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
120                   (standard_select << 0));
121     OUT_BCS_BATCH(batch,
122                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
123                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
124                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
125                   (0 << 1)  |
126                   (0 << 0));
127     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
128     OUT_BCS_BATCH(batch, 0); /* reserved */
129     ADVANCE_BCS_BATCH(batch);
130 }
131
132 static void
133 gen75_mfd_surface_state(VADriverContextP ctx,
134                         struct decode_state *decode_state,
135                         int standard_select,
136                         struct gen7_mfd_context *gen7_mfd_context)
137 {
138     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
139     struct object_surface *obj_surface = decode_state->render_object;
140     unsigned int y_cb_offset;
141     unsigned int y_cr_offset;
142     unsigned int surface_format;
143
144     assert(obj_surface);
145
146     y_cb_offset = obj_surface->y_cb_offset;
147     y_cr_offset = obj_surface->y_cr_offset;
148
149     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
150                      MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
151
152     BEGIN_BCS_BATCH(batch, 6);
153     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
154     OUT_BCS_BATCH(batch, 0);
155     OUT_BCS_BATCH(batch,
156                   ((obj_surface->orig_height - 1) << 18) |
157                   ((obj_surface->orig_width - 1) << 4));
158     OUT_BCS_BATCH(batch,
159                   (surface_format << 28) | /* 420 planar YUV surface */
160                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
161                   (0 << 22) | /* surface object control state, ignored */
162                   ((obj_surface->width - 1) << 3) | /* pitch */
163                   (0 << 2)  | /* must be 0 */
164                   (1 << 1)  | /* must be tiled */
165                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
166     OUT_BCS_BATCH(batch,
167                   (0 << 16) | /* X offset for U(Cb), must be 0 */
168                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
169     OUT_BCS_BATCH(batch,
170                   (0 << 16) | /* X offset for V(Cr), must be 0 */
171                   ((standard_select == MFX_FORMAT_JPEG ? y_cr_offset : 0) << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
172     ADVANCE_BCS_BATCH(batch);
173 }
174
175 static void
176 gen75_mfd_pipe_buf_addr_state_bplus(VADriverContextP ctx,
177                                     struct decode_state *decode_state,
178                                     int standard_select,
179                                     struct gen7_mfd_context *gen7_mfd_context)
180 {
181     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
182     int i;
183
184     BEGIN_BCS_BATCH(batch, 61);
185     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
186     /* Pre-deblock 1-3 */
187     if (gen7_mfd_context->pre_deblocking_output.valid)
188         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
189                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
190                       0);
191     else
192         OUT_BCS_BATCH(batch, 0);
193
194     OUT_BCS_BATCH(batch, 0);
195     OUT_BCS_BATCH(batch, 0);
196     /* Post-debloing 4-6 */
197     if (gen7_mfd_context->post_deblocking_output.valid)
198         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
199                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
200                       0);
201     else
202         OUT_BCS_BATCH(batch, 0);
203
204     OUT_BCS_BATCH(batch, 0);
205     OUT_BCS_BATCH(batch, 0);
206
207     /* uncompressed-video & stream out 7-12 */
208     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
209     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
210     OUT_BCS_BATCH(batch, 0);
211     OUT_BCS_BATCH(batch, 0);
212     OUT_BCS_BATCH(batch, 0);
213     OUT_BCS_BATCH(batch, 0);
214
215     /* intra row-store scratch 13-15 */
216     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
217         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
218                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
219                       0);
220     else
221         OUT_BCS_BATCH(batch, 0);
222
223     OUT_BCS_BATCH(batch, 0);
224     OUT_BCS_BATCH(batch, 0);
225     /* deblocking-filter-row-store 16-18 */
226     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
227         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
228                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
229                       0);
230     else
231         OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234
235     /* DW 19..50 */
236     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
237         struct object_surface *obj_surface;
238
239         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
240             gen7_mfd_context->reference_surface[i].obj_surface &&
241             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
242             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
243
244             OUT_BCS_RELOC(batch, obj_surface->bo,
245                           I915_GEM_DOMAIN_INSTRUCTION, 0,
246                           0);
247         } else {
248             OUT_BCS_BATCH(batch, 0);
249         }
250         OUT_BCS_BATCH(batch, 0);
251     }
252     /* reference property 51 */
253     OUT_BCS_BATCH(batch, 0);
254
255     /* Macroblock status & ILDB 52-57 */
256     OUT_BCS_BATCH(batch, 0);
257     OUT_BCS_BATCH(batch, 0);
258     OUT_BCS_BATCH(batch, 0);
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262
263     /* the second Macroblock status 58-60 */
264     OUT_BCS_BATCH(batch, 0);
265     OUT_BCS_BATCH(batch, 0);
266     OUT_BCS_BATCH(batch, 0);
267     ADVANCE_BCS_BATCH(batch);
268 }
269
270 static void
271 gen75_mfd_pipe_buf_addr_state(VADriverContextP ctx,
272                               struct decode_state *decode_state,
273                               int standard_select,
274                               struct gen7_mfd_context *gen7_mfd_context)
275 {
276     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
277     struct i965_driver_data *i965 = i965_driver_data(ctx);
278     int i;
279
280     if (IS_STEPPING_BPLUS(i965)) {
281         gen75_mfd_pipe_buf_addr_state_bplus(ctx, decode_state,
282                                             standard_select, gen7_mfd_context);
283         return;
284     }
285
286     BEGIN_BCS_BATCH(batch, 25);
287     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
288     if (gen7_mfd_context->pre_deblocking_output.valid)
289         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
290                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
291                       0);
292     else
293         OUT_BCS_BATCH(batch, 0);
294
295     if (gen7_mfd_context->post_deblocking_output.valid)
296         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
297                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
298                       0);
299     else
300         OUT_BCS_BATCH(batch, 0);
301
302     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
303     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
304
305     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
306         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
307                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
308                       0);
309     else
310         OUT_BCS_BATCH(batch, 0);
311
312     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
313         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
314                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
315                       0);
316     else
317         OUT_BCS_BATCH(batch, 0);
318
319     /* DW 7..22 */
320     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
321         struct object_surface *obj_surface;
322
323         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
324             gen7_mfd_context->reference_surface[i].obj_surface &&
325             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
326             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
327
328             OUT_BCS_RELOC(batch, obj_surface->bo,
329                           I915_GEM_DOMAIN_INSTRUCTION, 0,
330                           0);
331         } else {
332             OUT_BCS_BATCH(batch, 0);
333         }
334     }
335
336     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
337     OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
338     ADVANCE_BCS_BATCH(batch);
339 }
340
341 static void
342 gen75_mfd_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
343                                         dri_bo *slice_data_bo,
344                                         int standard_select,
345                                         struct gen7_mfd_context *gen7_mfd_context)
346 {
347     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
348
349     BEGIN_BCS_BATCH(batch, 26);
350     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
351     /* MFX In BS 1-5 */
352     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
353     OUT_BCS_BATCH(batch, 0);
354     OUT_BCS_BATCH(batch, 0);
355     /* Upper bound 4-5 */
356     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
357     OUT_BCS_BATCH(batch, 0);
358
359     /* MFX indirect MV 6-10 */
360     OUT_BCS_BATCH(batch, 0);
361     OUT_BCS_BATCH(batch, 0);
362     OUT_BCS_BATCH(batch, 0);
363     OUT_BCS_BATCH(batch, 0);
364     OUT_BCS_BATCH(batch, 0);
365
366     /* MFX IT_COFF 11-15 */
367     OUT_BCS_BATCH(batch, 0);
368     OUT_BCS_BATCH(batch, 0);
369     OUT_BCS_BATCH(batch, 0);
370     OUT_BCS_BATCH(batch, 0);
371     OUT_BCS_BATCH(batch, 0);
372
373     /* MFX IT_DBLK 16-20 */
374     OUT_BCS_BATCH(batch, 0);
375     OUT_BCS_BATCH(batch, 0);
376     OUT_BCS_BATCH(batch, 0);
377     OUT_BCS_BATCH(batch, 0);
378     OUT_BCS_BATCH(batch, 0);
379
380     /* MFX PAK_BSE object for encoder 21-25 */
381     OUT_BCS_BATCH(batch, 0);
382     OUT_BCS_BATCH(batch, 0);
383     OUT_BCS_BATCH(batch, 0);
384     OUT_BCS_BATCH(batch, 0);
385     OUT_BCS_BATCH(batch, 0);
386
387     ADVANCE_BCS_BATCH(batch);
388 }
389
390 static void
391 gen75_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
392                                   dri_bo *slice_data_bo,
393                                   int standard_select,
394                                   struct gen7_mfd_context *gen7_mfd_context)
395 {
396     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
397     struct i965_driver_data *i965 = i965_driver_data(ctx);
398
399     if (IS_STEPPING_BPLUS(i965)) {
400         gen75_mfd_ind_obj_base_addr_state_bplus(ctx, slice_data_bo,
401                                                 standard_select, gen7_mfd_context);
402         return;
403     }
404
405     BEGIN_BCS_BATCH(batch, 11);
406     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
407     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
408     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
409     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
410     OUT_BCS_BATCH(batch, 0);
411     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
412     OUT_BCS_BATCH(batch, 0);
413     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
414     OUT_BCS_BATCH(batch, 0);
415     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
416     OUT_BCS_BATCH(batch, 0);
417     ADVANCE_BCS_BATCH(batch);
418 }
419
420 static void
421 gen75_mfd_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
422                                         struct decode_state *decode_state,
423                                         int standard_select,
424                                         struct gen7_mfd_context *gen7_mfd_context)
425 {
426     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
427
428     BEGIN_BCS_BATCH(batch, 10);
429     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
430
431     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
432         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
433                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
434                       0);
435     else
436         OUT_BCS_BATCH(batch, 0);
437
438     OUT_BCS_BATCH(batch, 0);
439     OUT_BCS_BATCH(batch, 0);
440     /* MPR Row Store Scratch buffer 4-6 */
441     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
442         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
443                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
444                       0);
445     else
446         OUT_BCS_BATCH(batch, 0);
447     OUT_BCS_BATCH(batch, 0);
448     OUT_BCS_BATCH(batch, 0);
449
450     /* Bitplane 7-9 */
451     if (gen7_mfd_context->bitplane_read_buffer.valid)
452         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
453                       I915_GEM_DOMAIN_INSTRUCTION, 0,
454                       0);
455     else
456         OUT_BCS_BATCH(batch, 0);
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459
460     ADVANCE_BCS_BATCH(batch);
461 }
462
463 static void
464 gen75_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
465                                   struct decode_state *decode_state,
466                                   int standard_select,
467                                   struct gen7_mfd_context *gen7_mfd_context)
468 {
469     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
470     struct i965_driver_data *i965 = i965_driver_data(ctx);
471
472     if (IS_STEPPING_BPLUS(i965)) {
473         gen75_mfd_bsp_buf_base_addr_state_bplus(ctx, decode_state,
474                                                 standard_select, gen7_mfd_context);
475         return;
476     }
477
478     BEGIN_BCS_BATCH(batch, 4);
479     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
480
481     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
482         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
483                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
484                       0);
485     else
486         OUT_BCS_BATCH(batch, 0);
487
488     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
489         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
490                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
491                       0);
492     else
493         OUT_BCS_BATCH(batch, 0);
494
495     if (gen7_mfd_context->bitplane_read_buffer.valid)
496         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
497                       I915_GEM_DOMAIN_INSTRUCTION, 0,
498                       0);
499     else
500         OUT_BCS_BATCH(batch, 0);
501
502     ADVANCE_BCS_BATCH(batch);
503 }
504
505 static void
506 gen75_mfd_qm_state(VADriverContextP ctx,
507                    int qm_type,
508                    unsigned char *qm,
509                    int qm_length,
510                    struct gen7_mfd_context *gen7_mfd_context)
511 {
512     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
513     unsigned int qm_buffer[16];
514
515     assert(qm_length <= 16 * 4);
516     memcpy(qm_buffer, qm, qm_length);
517
518     BEGIN_BCS_BATCH(batch, 18);
519     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
520     OUT_BCS_BATCH(batch, qm_type << 0);
521     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
522     ADVANCE_BCS_BATCH(batch);
523 }
524
525 static void
526 gen75_mfd_avc_img_state(VADriverContextP ctx,
527                         struct decode_state *decode_state,
528                         struct gen7_mfd_context *gen7_mfd_context)
529 {
530     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
531     int img_struct;
532     int mbaff_frame_flag;
533     unsigned int width_in_mbs, height_in_mbs;
534     VAPictureParameterBufferH264 *pic_param;
535
536     assert(decode_state->pic_param && decode_state->pic_param->buffer);
537     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
538
539     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
540
541     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
542         img_struct = 1;
543     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
544         img_struct = 3;
545     else
546         img_struct = 0;
547
548     if ((img_struct & 0x1) == 0x1) {
549         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
550     } else {
551         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
552     }
553
554     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
555         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
556         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
557     } else {
558         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
559     }
560
561     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
562                         !pic_param->pic_fields.bits.field_pic_flag);
563
564     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
565     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
566
567     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
568     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
569            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
570     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
571
572     BEGIN_BCS_BATCH(batch, 17);
573     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
574     OUT_BCS_BATCH(batch,
575                   (width_in_mbs * height_in_mbs - 1));
576     OUT_BCS_BATCH(batch,
577                   ((height_in_mbs - 1) << 16) |
578                   ((width_in_mbs - 1) << 0));
579     OUT_BCS_BATCH(batch,
580                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
581                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
582                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
583                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
584                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
585                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
586                   (img_struct << 8));
587     OUT_BCS_BATCH(batch,
588                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
589                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
590                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
591                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
592                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
593                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
594                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
595                   (mbaff_frame_flag << 1) |
596                   (pic_param->pic_fields.bits.field_pic_flag << 0));
597     OUT_BCS_BATCH(batch, 0);
598     OUT_BCS_BATCH(batch, 0);
599     OUT_BCS_BATCH(batch, 0);
600     OUT_BCS_BATCH(batch, 0);
601     OUT_BCS_BATCH(batch, 0);
602     OUT_BCS_BATCH(batch, 0);
603     OUT_BCS_BATCH(batch, 0);
604     OUT_BCS_BATCH(batch, 0);
605     OUT_BCS_BATCH(batch, 0);
606     OUT_BCS_BATCH(batch, 0);
607     OUT_BCS_BATCH(batch, 0);
608     OUT_BCS_BATCH(batch, 0);
609     ADVANCE_BCS_BATCH(batch);
610 }
611
612 static void
613 gen75_mfd_avc_qm_state(VADriverContextP ctx,
614                        struct decode_state *decode_state,
615                        struct gen7_mfd_context *gen7_mfd_context)
616 {
617     VAIQMatrixBufferH264 *iq_matrix;
618     VAPictureParameterBufferH264 *pic_param;
619
620     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
621         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
622     else
623         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
624
625     assert(decode_state->pic_param && decode_state->pic_param->buffer);
626     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
627
628     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
629     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
630
631     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
632         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
633         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
634     }
635 }
636
637 static inline void
638 gen75_mfd_avc_picid_state(VADriverContextP ctx,
639                           struct decode_state *decode_state,
640                           struct gen7_mfd_context *gen7_mfd_context)
641 {
642     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
643                                gen7_mfd_context->reference_surface);
644 }
645
646 static void
647 gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
648                                      struct decode_state *decode_state,
649                                      VAPictureParameterBufferH264 *pic_param,
650                                      VASliceParameterBufferH264 *slice_param,
651                                      struct gen7_mfd_context *gen7_mfd_context)
652 {
653     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
654     struct object_surface *obj_surface;
655     GenAvcSurface *gen7_avc_surface;
656     VAPictureH264 *va_pic;
657     int i;
658
659     BEGIN_BCS_BATCH(batch, 71);
660     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
661
662     /* reference surfaces 0..15 */
663     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
664         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
665             gen7_mfd_context->reference_surface[i].obj_surface &&
666             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
667
668             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
669             gen7_avc_surface = obj_surface->private_data;
670             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
671                           I915_GEM_DOMAIN_INSTRUCTION, 0,
672                           0);
673             OUT_BCS_BATCH(batch, 0);
674         } else {
675             OUT_BCS_BATCH(batch, 0);
676             OUT_BCS_BATCH(batch, 0);
677         }
678     }
679
680     OUT_BCS_BATCH(batch, 0);
681
682     /* the current decoding frame/field */
683     va_pic = &pic_param->CurrPic;
684     obj_surface = decode_state->render_object;
685     assert(obj_surface->bo && obj_surface->private_data);
686     gen7_avc_surface = obj_surface->private_data;
687
688     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
689                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
690                   0);
691
692     OUT_BCS_BATCH(batch, 0);
693     OUT_BCS_BATCH(batch, 0);
694
695     /* POC List */
696     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
697         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
698
699         if (obj_surface) {
700             const VAPictureH264 * const va_pic = avc_find_picture(
701                                                      obj_surface->base.id, pic_param->ReferenceFrames,
702                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
703
704             assert(va_pic != NULL);
705             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
706             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
707         } else {
708             OUT_BCS_BATCH(batch, 0);
709             OUT_BCS_BATCH(batch, 0);
710         }
711     }
712
713     va_pic = &pic_param->CurrPic;
714     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
715     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
716
717     ADVANCE_BCS_BATCH(batch);
718 }
719
720 static void
721 gen75_mfd_avc_directmode_state(VADriverContextP ctx,
722                                struct decode_state *decode_state,
723                                VAPictureParameterBufferH264 *pic_param,
724                                VASliceParameterBufferH264 *slice_param,
725                                struct gen7_mfd_context *gen7_mfd_context)
726 {
727     struct i965_driver_data *i965 = i965_driver_data(ctx);
728     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
729     struct object_surface *obj_surface;
730     GenAvcSurface *gen7_avc_surface;
731     VAPictureH264 *va_pic;
732     int i;
733
734     if (IS_STEPPING_BPLUS(i965)) {
735         gen75_mfd_avc_directmode_state_bplus(ctx, decode_state, pic_param, slice_param,
736                                              gen7_mfd_context);
737
738         return;
739     }
740
741     BEGIN_BCS_BATCH(batch, 69);
742     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
743
744     /* reference surfaces 0..15 */
745     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
746         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
747             gen7_mfd_context->reference_surface[i].obj_surface &&
748             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
749
750             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
751             gen7_avc_surface = obj_surface->private_data;
752
753             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
754                           I915_GEM_DOMAIN_INSTRUCTION, 0,
755                           0);
756
757             if (gen7_avc_surface->dmv_bottom_flag == 1)
758                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
759                               I915_GEM_DOMAIN_INSTRUCTION, 0,
760                               0);
761             else
762                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
763                               I915_GEM_DOMAIN_INSTRUCTION, 0,
764                               0);
765         } else {
766             OUT_BCS_BATCH(batch, 0);
767             OUT_BCS_BATCH(batch, 0);
768         }
769     }
770
771     /* the current decoding frame/field */
772     va_pic = &pic_param->CurrPic;
773     obj_surface = decode_state->render_object;
774     assert(obj_surface->bo && obj_surface->private_data);
775     gen7_avc_surface = obj_surface->private_data;
776
777     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
778                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
779                   0);
780
781     if (gen7_avc_surface->dmv_bottom_flag == 1)
782         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
783                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
784                       0);
785     else
786         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
787                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
788                       0);
789
790     /* POC List */
791     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
792         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
793
794         if (obj_surface) {
795             const VAPictureH264 * const va_pic = avc_find_picture(
796                                                      obj_surface->base.id, pic_param->ReferenceFrames,
797                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
798
799             assert(va_pic != NULL);
800             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
801             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
802         } else {
803             OUT_BCS_BATCH(batch, 0);
804             OUT_BCS_BATCH(batch, 0);
805         }
806     }
807
808     va_pic = &pic_param->CurrPic;
809     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
810     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
811
812     ADVANCE_BCS_BATCH(batch);
813 }
814
815 static void
816 gen75_mfd_avc_phantom_slice_first(VADriverContextP ctx,
817                                   VAPictureParameterBufferH264 *pic_param,
818                                   VASliceParameterBufferH264 *next_slice_param,
819                                   struct gen7_mfd_context *gen7_mfd_context)
820 {
821     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
822 }
823
824 static void
825 gen75_mfd_avc_slice_state(VADriverContextP ctx,
826                           VAPictureParameterBufferH264 *pic_param,
827                           VASliceParameterBufferH264 *slice_param,
828                           VASliceParameterBufferH264 *next_slice_param,
829                           struct gen7_mfd_context *gen7_mfd_context)
830 {
831     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
832     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
833     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
834     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
835     int num_ref_idx_l0, num_ref_idx_l1;
836     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
837                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
838     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
839     int slice_type;
840
841     if (slice_param->slice_type == SLICE_TYPE_I ||
842         slice_param->slice_type == SLICE_TYPE_SI) {
843         slice_type = SLICE_TYPE_I;
844     } else if (slice_param->slice_type == SLICE_TYPE_P ||
845                slice_param->slice_type == SLICE_TYPE_SP) {
846         slice_type = SLICE_TYPE_P;
847     } else {
848         assert(slice_param->slice_type == SLICE_TYPE_B);
849         slice_type = SLICE_TYPE_B;
850     }
851
852     if (slice_type == SLICE_TYPE_I) {
853         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
854         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
855         num_ref_idx_l0 = 0;
856         num_ref_idx_l1 = 0;
857     } else if (slice_type == SLICE_TYPE_P) {
858         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
859         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
860         num_ref_idx_l1 = 0;
861     } else {
862         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
863         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
864     }
865
866     first_mb_in_slice = slice_param->first_mb_in_slice;
867     slice_hor_pos = first_mb_in_slice % width_in_mbs;
868     slice_ver_pos = first_mb_in_slice / width_in_mbs;
869
870     if (mbaff_picture)
871         slice_ver_pos = slice_ver_pos << 1;
872
873     if (next_slice_param) {
874         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
875         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
876         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
877
878         if (mbaff_picture)
879             next_slice_ver_pos = next_slice_ver_pos << 1;
880     } else {
881         next_slice_hor_pos = 0;
882         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
883     }
884
885     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
886     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
887     OUT_BCS_BATCH(batch, slice_type);
888     OUT_BCS_BATCH(batch,
889                   (num_ref_idx_l1 << 24) |
890                   (num_ref_idx_l0 << 16) |
891                   (slice_param->chroma_log2_weight_denom << 8) |
892                   (slice_param->luma_log2_weight_denom << 0));
893     OUT_BCS_BATCH(batch,
894                   (slice_param->direct_spatial_mv_pred_flag << 29) |
895                   (slice_param->disable_deblocking_filter_idc << 27) |
896                   (slice_param->cabac_init_idc << 24) |
897                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
898                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
899                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
900     OUT_BCS_BATCH(batch,
901                   (slice_ver_pos << 24) |
902                   (slice_hor_pos << 16) |
903                   (first_mb_in_slice << 0));
904     OUT_BCS_BATCH(batch,
905                   (next_slice_ver_pos << 16) |
906                   (next_slice_hor_pos << 0));
907     OUT_BCS_BATCH(batch,
908                   (next_slice_param == NULL) << 19); /* last slice flag */
909     OUT_BCS_BATCH(batch, 0);
910     OUT_BCS_BATCH(batch, 0);
911     OUT_BCS_BATCH(batch, 0);
912     OUT_BCS_BATCH(batch, 0);
913     ADVANCE_BCS_BATCH(batch);
914 }
915
916 static inline void
917 gen75_mfd_avc_ref_idx_state(VADriverContextP ctx,
918                             VAPictureParameterBufferH264 *pic_param,
919                             VASliceParameterBufferH264 *slice_param,
920                             struct gen7_mfd_context *gen7_mfd_context)
921 {
922     gen6_send_avc_ref_idx_state(
923         gen7_mfd_context->base.batch,
924         slice_param,
925         gen7_mfd_context->reference_surface
926     );
927 }
928
929 static void
930 gen75_mfd_avc_weightoffset_state(VADriverContextP ctx,
931                                  VAPictureParameterBufferH264 *pic_param,
932                                  VASliceParameterBufferH264 *slice_param,
933                                  struct gen7_mfd_context *gen7_mfd_context)
934 {
935     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
936     int i, j, num_weight_offset_table = 0;
937     short weightoffsets[32 * 6];
938
939     if ((slice_param->slice_type == SLICE_TYPE_P ||
940          slice_param->slice_type == SLICE_TYPE_SP) &&
941         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
942         num_weight_offset_table = 1;
943     }
944
945     if ((slice_param->slice_type == SLICE_TYPE_B) &&
946         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
947         num_weight_offset_table = 2;
948     }
949
950     for (i = 0; i < num_weight_offset_table; i++) {
951         BEGIN_BCS_BATCH(batch, 98);
952         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
953         OUT_BCS_BATCH(batch, i);
954
955         if (i == 0) {
956             for (j = 0; j < 32; j++) {
957                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
958                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
959                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
960                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
961                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
962                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
963             }
964         } else {
965             for (j = 0; j < 32; j++) {
966                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
967                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
968                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
969                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
970                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
971                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
972             }
973         }
974
975         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
976         ADVANCE_BCS_BATCH(batch);
977     }
978 }
979
980 static void
981 gen75_mfd_avc_bsd_object(VADriverContextP ctx,
982                          VAPictureParameterBufferH264 *pic_param,
983                          VASliceParameterBufferH264 *slice_param,
984                          dri_bo *slice_data_bo,
985                          VASliceParameterBufferH264 *next_slice_param,
986                          struct gen7_mfd_context *gen7_mfd_context)
987 {
988     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
989     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
990                                                             slice_param,
991                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
992
993     /* the input bitsteam format on GEN7 differs from GEN6 */
994     BEGIN_BCS_BATCH(batch, 6);
995     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
996     OUT_BCS_BATCH(batch,
997                   (slice_param->slice_data_size - slice_param->slice_data_offset));
998     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
999     OUT_BCS_BATCH(batch,
1000                   (0 << 31) |
1001                   (0 << 14) |
1002                   (0 << 12) |
1003                   (0 << 10) |
1004                   (0 << 8));
1005     OUT_BCS_BATCH(batch,
1006                   ((slice_data_bit_offset >> 3) << 16) |
1007                   (1 << 7)  |
1008                   (0 << 5)  |
1009                   (0 << 4)  |
1010                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
1011                   (slice_data_bit_offset & 0x7));
1012     OUT_BCS_BATCH(batch, 0);
1013     ADVANCE_BCS_BATCH(batch);
1014 }
1015
1016 static inline void
1017 gen75_mfd_avc_context_init(
1018     VADriverContextP         ctx,
1019     struct gen7_mfd_context *gen7_mfd_context
1020 )
1021 {
1022     /* Initialize flat scaling lists */
1023     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
1024 }
1025
1026 static void
1027 gen75_mfd_avc_decode_init(VADriverContextP ctx,
1028                           struct decode_state *decode_state,
1029                           struct gen7_mfd_context *gen7_mfd_context)
1030 {
1031     VAPictureParameterBufferH264 *pic_param;
1032     VASliceParameterBufferH264 *slice_param;
1033     struct i965_driver_data *i965 = i965_driver_data(ctx);
1034     struct object_surface *obj_surface;
1035     dri_bo *bo;
1036     int i, j, enable_avc_ildb = 0;
1037     unsigned int width_in_mbs, height_in_mbs;
1038
1039     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
1040         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1041         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1042
1043         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1044             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1045             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1046                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1047                    (slice_param->slice_type == SLICE_TYPE_P) ||
1048                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1049                    (slice_param->slice_type == SLICE_TYPE_B));
1050
1051             if (slice_param->disable_deblocking_filter_idc != 1) {
1052                 enable_avc_ildb = 1;
1053                 break;
1054             }
1055
1056             slice_param++;
1057         }
1058     }
1059
1060     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1061     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1062     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
1063                                        gen7_mfd_context->reference_surface);
1064     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
1065     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
1066     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
1067     assert(height_in_mbs > 0 && height_in_mbs <= 256);
1068
1069     /* Current decoded picture */
1070     obj_surface = decode_state->render_object;
1071     if (pic_param->pic_fields.bits.reference_pic_flag)
1072         obj_surface->flags |= SURFACE_REFERENCED;
1073     else
1074         obj_surface->flags &= ~SURFACE_REFERENCED;
1075
1076     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
1077     gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
1078
1079     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1080     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1081     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1082     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
1083
1084     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1085     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1086     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1087     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1088
1089     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1090     bo = dri_bo_alloc(i965->intel.bufmgr,
1091                       "intra row store",
1092                       width_in_mbs * 64,
1093                       0x1000);
1094     assert(bo);
1095     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1096     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1097
1098     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1099     bo = dri_bo_alloc(i965->intel.bufmgr,
1100                       "deblocking filter row store",
1101                       width_in_mbs * 64 * 4,
1102                       0x1000);
1103     assert(bo);
1104     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1105     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1106
1107     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1108     bo = dri_bo_alloc(i965->intel.bufmgr,
1109                       "bsd mpc row store",
1110                       width_in_mbs * 64 * 2,
1111                       0x1000);
1112     assert(bo);
1113     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1114     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1115
1116     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
1117     bo = dri_bo_alloc(i965->intel.bufmgr,
1118                       "mpr row store",
1119                       width_in_mbs * 64 * 2,
1120                       0x1000);
1121     assert(bo);
1122     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1123     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1124
1125     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1126 }
1127
1128 static void
1129 gen75_mfd_avc_decode_picture(VADriverContextP ctx,
1130                              struct decode_state *decode_state,
1131                              struct gen7_mfd_context *gen7_mfd_context)
1132 {
1133     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1134     VAPictureParameterBufferH264 *pic_param;
1135     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1136     dri_bo *slice_data_bo;
1137     int i, j;
1138
1139     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1140     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1141     gen75_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
1142
1143     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1144     intel_batchbuffer_emit_mi_flush(batch);
1145     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1146     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1147     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1148     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1149     gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
1150     gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
1151     gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
1152
1153     for (j = 0; j < decode_state->num_slice_params; j++) {
1154         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1155         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1156         slice_data_bo = decode_state->slice_datas[j]->bo;
1157         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
1158
1159         if (j == decode_state->num_slice_params - 1)
1160             next_slice_group_param = NULL;
1161         else
1162             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1163
1164         if (j == 0 && slice_param->first_mb_in_slice)
1165             gen75_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
1166
1167         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1168             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1169             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1170                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1171                    (slice_param->slice_type == SLICE_TYPE_P) ||
1172                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1173                    (slice_param->slice_type == SLICE_TYPE_B));
1174
1175             if (i < decode_state->slice_params[j]->num_elements - 1)
1176                 next_slice_param = slice_param + 1;
1177             else
1178                 next_slice_param = next_slice_group_param;
1179
1180             gen75_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
1181             gen75_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
1182             gen75_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
1183             gen75_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1184             gen75_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1185             slice_param++;
1186         }
1187     }
1188
1189     intel_batchbuffer_end_atomic(batch);
1190     intel_batchbuffer_flush(batch);
1191 }
1192
1193 static void
1194 gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
1195                             struct decode_state *decode_state,
1196                             struct gen7_mfd_context *gen7_mfd_context)
1197 {
1198     VAPictureParameterBufferMPEG2 *pic_param;
1199     struct i965_driver_data *i965 = i965_driver_data(ctx);
1200     struct object_surface *obj_surface;
1201     dri_bo *bo;
1202     unsigned int width_in_mbs;
1203
1204     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1205     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1206     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1207
1208     mpeg2_set_reference_surfaces(
1209         ctx,
1210         gen7_mfd_context->reference_surface,
1211         decode_state,
1212         pic_param
1213     );
1214
1215     /* Current decoded picture */
1216     obj_surface = decode_state->render_object;
1217     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1218
1219     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1220     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1221     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1222     gen7_mfd_context->pre_deblocking_output.valid = 1;
1223
1224     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1225     bo = dri_bo_alloc(i965->intel.bufmgr,
1226                       "bsd mpc row store",
1227                       width_in_mbs * 96,
1228                       0x1000);
1229     assert(bo);
1230     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1231     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1232
1233     gen7_mfd_context->post_deblocking_output.valid = 0;
1234     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1235     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1236     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1237     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1238 }
1239
1240 static void
1241 gen75_mfd_mpeg2_pic_state(VADriverContextP ctx,
1242                           struct decode_state *decode_state,
1243                           struct gen7_mfd_context *gen7_mfd_context)
1244 {
1245     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1246     VAPictureParameterBufferMPEG2 *pic_param;
1247     unsigned int slice_concealment_disable_bit = 0;
1248
1249     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1250     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1251
1252     slice_concealment_disable_bit = 1;
1253
1254     BEGIN_BCS_BATCH(batch, 13);
1255     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1256     OUT_BCS_BATCH(batch,
1257                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1258                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1259                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1260                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1261                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1262                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1263                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1264                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1265                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1266                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1267                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1268                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1269     OUT_BCS_BATCH(batch,
1270                   pic_param->picture_coding_type << 9);
1271     OUT_BCS_BATCH(batch,
1272                   (slice_concealment_disable_bit << 31) |
1273                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1274                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1275     OUT_BCS_BATCH(batch, 0);
1276     OUT_BCS_BATCH(batch, 0);
1277     OUT_BCS_BATCH(batch, 0);
1278     OUT_BCS_BATCH(batch, 0);
1279     OUT_BCS_BATCH(batch, 0);
1280     OUT_BCS_BATCH(batch, 0);
1281     OUT_BCS_BATCH(batch, 0);
1282     OUT_BCS_BATCH(batch, 0);
1283     OUT_BCS_BATCH(batch, 0);
1284     ADVANCE_BCS_BATCH(batch);
1285 }
1286
1287 static void
1288 gen75_mfd_mpeg2_qm_state(VADriverContextP ctx,
1289                          struct decode_state *decode_state,
1290                          struct gen7_mfd_context *gen7_mfd_context)
1291 {
1292     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1293     int i, j;
1294
1295     /* Update internal QM state */
1296     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1297         VAIQMatrixBufferMPEG2 * const iq_matrix =
1298             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1299
1300         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1301             iq_matrix->load_intra_quantiser_matrix) {
1302             gen_iq_matrix->load_intra_quantiser_matrix =
1303                 iq_matrix->load_intra_quantiser_matrix;
1304             if (iq_matrix->load_intra_quantiser_matrix) {
1305                 for (j = 0; j < 64; j++)
1306                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1307                         iq_matrix->intra_quantiser_matrix[j];
1308             }
1309         }
1310
1311         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1312             iq_matrix->load_non_intra_quantiser_matrix) {
1313             gen_iq_matrix->load_non_intra_quantiser_matrix =
1314                 iq_matrix->load_non_intra_quantiser_matrix;
1315             if (iq_matrix->load_non_intra_quantiser_matrix) {
1316                 for (j = 0; j < 64; j++)
1317                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1318                         iq_matrix->non_intra_quantiser_matrix[j];
1319             }
1320         }
1321     }
1322
1323     /* Commit QM state to HW */
1324     for (i = 0; i < 2; i++) {
1325         unsigned char *qm = NULL;
1326         int qm_type;
1327
1328         if (i == 0) {
1329             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1330                 qm = gen_iq_matrix->intra_quantiser_matrix;
1331                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1332             }
1333         } else {
1334             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1335                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1336                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1337             }
1338         }
1339
1340         if (!qm)
1341             continue;
1342
1343         gen75_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1344     }
1345 }
1346
1347 static void
1348 gen75_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1349                            VAPictureParameterBufferMPEG2 *pic_param,
1350                            VASliceParameterBufferMPEG2 *slice_param,
1351                            VASliceParameterBufferMPEG2 *next_slice_param,
1352                            struct gen7_mfd_context *gen7_mfd_context)
1353 {
1354     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1355     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1356     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1357
1358     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1359         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1360         is_field_pic = 1;
1361     is_field_pic_wa = is_field_pic &&
1362                       gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1363
1364     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1365     hpos0 = slice_param->slice_horizontal_position;
1366
1367     if (next_slice_param == NULL) {
1368         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1369         hpos1 = 0;
1370     } else {
1371         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1372         hpos1 = next_slice_param->slice_horizontal_position;
1373     }
1374
1375     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1376
1377     BEGIN_BCS_BATCH(batch, 5);
1378     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1379     OUT_BCS_BATCH(batch,
1380                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1381     OUT_BCS_BATCH(batch,
1382                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1383     OUT_BCS_BATCH(batch,
1384                   hpos0 << 24 |
1385                   vpos0 << 16 |
1386                   mb_count << 8 |
1387                   (next_slice_param == NULL) << 5 |
1388                   (next_slice_param == NULL) << 3 |
1389                   (slice_param->macroblock_offset & 0x7));
1390     OUT_BCS_BATCH(batch,
1391                   (slice_param->quantiser_scale_code << 24) |
1392                   (vpos1 << 8 | hpos1));
1393     ADVANCE_BCS_BATCH(batch);
1394 }
1395
1396 static void
1397 gen75_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1398                                struct decode_state *decode_state,
1399                                struct gen7_mfd_context *gen7_mfd_context)
1400 {
1401     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1402     VAPictureParameterBufferMPEG2 *pic_param;
1403     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1404     dri_bo *slice_data_bo;
1405     int i, j;
1406
1407     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1408     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1409
1410     gen75_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1411     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1412     intel_batchbuffer_emit_mi_flush(batch);
1413     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1414     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1415     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1416     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1417     gen75_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1418     gen75_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1419
1420     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1421         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1422             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1423
1424     for (j = 0; j < decode_state->num_slice_params; j++) {
1425         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1426         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1427         slice_data_bo = decode_state->slice_datas[j]->bo;
1428         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1429
1430         if (j == decode_state->num_slice_params - 1)
1431             next_slice_group_param = NULL;
1432         else
1433             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1434
1435         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1436             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1437
1438             if (i < decode_state->slice_params[j]->num_elements - 1)
1439                 next_slice_param = slice_param + 1;
1440             else
1441                 next_slice_param = next_slice_group_param;
1442
1443             gen75_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1444             slice_param++;
1445         }
1446     }
1447
1448     intel_batchbuffer_end_atomic(batch);
1449     intel_batchbuffer_flush(batch);
1450 }
1451
1452 static const int va_to_gen7_vc1_mv[4] = {
1453     1, /* 1-MV */
1454     2, /* 1-MV half-pel */
1455     3, /* 1-MV half-pef bilinear */
1456     0, /* Mixed MV */
1457 };
1458
1459 static const int b_picture_scale_factor[21] = {
1460     128, 85,  170, 64,  192,
1461     51,  102, 153, 204, 43,
1462     215, 37,  74,  111, 148,
1463     185, 222, 32,  96,  160,
1464     224,
1465 };
1466
1467 static const int va_to_gen7_vc1_condover[3] = {
1468     0,
1469     2,
1470     3
1471 };
1472
1473 static const int va_to_gen7_vc1_profile[4] = {
1474     GEN7_VC1_SIMPLE_PROFILE,
1475     GEN7_VC1_MAIN_PROFILE,
1476     GEN7_VC1_RESERVED_PROFILE,
1477     GEN7_VC1_ADVANCED_PROFILE
1478 };
1479
1480 static void
1481 gen75_mfd_free_vc1_surface(void **data)
1482 {
1483     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1484
1485     if (!gen7_vc1_surface)
1486         return;
1487
1488     dri_bo_unreference(gen7_vc1_surface->dmv);
1489     free(gen7_vc1_surface);
1490     *data = NULL;
1491 }
1492
1493 static void
1494 gen75_mfd_init_vc1_surface(VADriverContextP ctx,
1495                            VAPictureParameterBufferVC1 *pic_param,
1496                            struct object_surface *obj_surface)
1497 {
1498     struct i965_driver_data *i965 = i965_driver_data(ctx);
1499     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1500     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1501
1502     obj_surface->free_private_data = gen75_mfd_free_vc1_surface;
1503
1504     if (!gen7_vc1_surface) {
1505         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1506         assert(gen7_vc1_surface);
1507         assert((obj_surface->size & 0x3f) == 0);
1508         obj_surface->private_data = gen7_vc1_surface;
1509     }
1510
1511     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1512     gen7_vc1_surface->intensity_compensation = 0;
1513     gen7_vc1_surface->luma_scale = 0;
1514     gen7_vc1_surface->luma_shift = 0;
1515
1516     if (gen7_vc1_surface->dmv == NULL) {
1517         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1518                                              "direct mv w/r buffer",
1519                                              128 * height_in_mbs * 64,
1520                                              0x1000);
1521     }
1522 }
1523
1524 static void
1525 gen75_mfd_vc1_decode_init(VADriverContextP ctx,
1526                           struct decode_state *decode_state,
1527                           struct gen7_mfd_context *gen7_mfd_context)
1528 {
1529     VAPictureParameterBufferVC1 *pic_param;
1530     struct i965_driver_data *i965 = i965_driver_data(ctx);
1531     struct object_surface *obj_surface;
1532     dri_bo *bo;
1533     int width_in_mbs;
1534     int picture_type;
1535     int intensity_compensation;
1536
1537     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1538     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1539     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1540     picture_type = pic_param->picture_fields.bits.picture_type;
1541     intensity_compensation = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1542
1543     intel_update_vc1_frame_store_index(ctx,
1544                                        decode_state,
1545                                        pic_param,
1546                                        gen7_mfd_context->reference_surface);
1547
1548     /* Forward reference picture */
1549     obj_surface = decode_state->reference_objects[0];
1550     if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1551         obj_surface &&
1552         obj_surface->private_data) {
1553         if (picture_type == 1 && intensity_compensation) { /* P picture */
1554             struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1555
1556             gen7_vc1_surface->intensity_compensation = intensity_compensation;
1557             gen7_vc1_surface->luma_scale = pic_param->luma_scale;
1558             gen7_vc1_surface->luma_shift = pic_param->luma_shift;
1559         }
1560     }
1561
1562     /* Current decoded picture */
1563     obj_surface = decode_state->render_object;
1564     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1565     gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1566
1567     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1568     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1569     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1570     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1571
1572     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1573     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1574     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1575     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1576
1577     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1578     bo = dri_bo_alloc(i965->intel.bufmgr,
1579                       "intra row store",
1580                       width_in_mbs * 64,
1581                       0x1000);
1582     assert(bo);
1583     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1584     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1585
1586     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1587     bo = dri_bo_alloc(i965->intel.bufmgr,
1588                       "deblocking filter row store",
1589                       width_in_mbs * 7 * 64,
1590                       0x1000);
1591     assert(bo);
1592     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1593     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1594
1595     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1596     bo = dri_bo_alloc(i965->intel.bufmgr,
1597                       "bsd mpc row store",
1598                       width_in_mbs * 96,
1599                       0x1000);
1600     assert(bo);
1601     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1602     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1603
1604     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1605
1606     if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1607         gen7_mfd_context->bitplane_read_buffer.valid = 1;
1608     else
1609         gen7_mfd_context->bitplane_read_buffer.valid = !!(pic_param->bitplane_present.value & 0x7f);
1610     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1611
1612     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1613         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1614         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1615         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1616         int src_w, src_h;
1617         uint8_t *src = NULL, *dst = NULL;
1618
1619         bo = dri_bo_alloc(i965->intel.bufmgr,
1620                           "VC-1 Bitplane",
1621                           bitplane_width * height_in_mbs,
1622                           0x1000);
1623         assert(bo);
1624         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1625
1626         dri_bo_map(bo, True);
1627         assert(bo->virtual);
1628         dst = bo->virtual;
1629
1630         if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1631             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1632                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1633                     int dst_index;
1634                     uint8_t src_value = 0x2;
1635
1636                     dst_index = src_w / 2;
1637                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1638                 }
1639
1640                 if (src_w & 1)
1641                     dst[src_w / 2] >>= 4;
1642
1643                 dst += bitplane_width;
1644             }
1645         } else {
1646             assert(decode_state->bit_plane->buffer);
1647             src = decode_state->bit_plane->buffer;
1648
1649             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1650                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1651                     int src_index, dst_index;
1652                     int src_shift;
1653                     uint8_t src_value;
1654
1655                     src_index = (src_h * width_in_mbs + src_w) / 2;
1656                     src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1657                     src_value = ((src[src_index] >> src_shift) & 0xf);
1658
1659                     dst_index = src_w / 2;
1660                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1661                 }
1662
1663                 if (src_w & 1)
1664                     dst[src_w / 2] >>= 4;
1665
1666                 dst += bitplane_width;
1667             }
1668         }
1669
1670         dri_bo_unmap(bo);
1671     } else
1672         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1673 }
1674
1675 static void
1676 gen75_mfd_vc1_pic_state(VADriverContextP ctx,
1677                         struct decode_state *decode_state,
1678                         struct gen7_mfd_context *gen7_mfd_context)
1679 {
1680     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1681     VAPictureParameterBufferVC1 *pic_param;
1682     struct object_surface *obj_surface;
1683     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1684     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1685     int unified_mv_mode;
1686     int ref_field_pic_polarity = 0;
1687     int scale_factor = 0;
1688     int trans_ac_y = 0;
1689     int dmv_surface_valid = 0;
1690     int brfd = 0;
1691     int fcm = 0;
1692     int picture_type;
1693     int ptype;
1694     int profile;
1695     int overlap = 0;
1696     int interpolation_mode = 0;
1697
1698     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1699     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1700
1701     picture_type = pic_param->picture_fields.bits.picture_type;
1702
1703     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1704     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1705     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1706     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1707     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1708     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1709     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1710     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1711
1712     if (dquant == 0) {
1713         alt_pquant_config = 0;
1714         alt_pquant_edge_mask = 0;
1715     } else if (dquant == 2) {
1716         alt_pquant_config = 1;
1717         alt_pquant_edge_mask = 0xf;
1718     } else {
1719         assert(dquant == 1);
1720         if (dquantfrm == 0) {
1721             alt_pquant_config = 0;
1722             alt_pquant_edge_mask = 0;
1723             alt_pq = 0;
1724         } else {
1725             assert(dquantfrm == 1);
1726             alt_pquant_config = 1;
1727
1728             switch (dqprofile) {
1729             case 3:
1730                 if (dqbilevel == 0) {
1731                     alt_pquant_config = 2;
1732                     alt_pquant_edge_mask = 0;
1733                 } else {
1734                     assert(dqbilevel == 1);
1735                     alt_pquant_config = 3;
1736                     alt_pquant_edge_mask = 0;
1737                 }
1738                 break;
1739
1740             case 0:
1741                 alt_pquant_edge_mask = 0xf;
1742                 break;
1743
1744             case 1:
1745                 if (dqdbedge == 3)
1746                     alt_pquant_edge_mask = 0x9;
1747                 else
1748                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1749
1750                 break;
1751
1752             case 2:
1753                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1754                 break;
1755
1756             default:
1757                 assert(0);
1758             }
1759         }
1760     }
1761
1762     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1763         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1764         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1765     } else {
1766         assert(pic_param->mv_fields.bits.mv_mode < 4);
1767         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1768     }
1769
1770     if (pic_param->sequence_fields.bits.interlace == 1 &&
1771         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1772         /* FIXME: calculate reference field picture polarity */
1773         assert(0);
1774         ref_field_pic_polarity = 0;
1775     }
1776
1777     if (pic_param->b_picture_fraction < 21)
1778         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1779
1780     if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1781         ptype = GEN7_VC1_P_PICTURE;
1782     else
1783         ptype = pic_param->picture_fields.bits.picture_type;
1784
1785     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1786         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1787     else {
1788         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1789
1790         /*
1791          * 8.3.6.2.1 Transform Type Selection
1792          * If variable-sized transform coding is not enabled,
1793          * then the 8x8 transform shall be used for all blocks.
1794          * it is also MFX_VC1_PIC_STATE requirement.
1795          */
1796         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1797             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1798             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1799         }
1800     }
1801
1802     if (picture_type == GEN7_VC1_B_PICTURE) {
1803         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1804
1805         obj_surface = decode_state->reference_objects[1];
1806
1807         if (obj_surface)
1808             gen7_vc1_surface = obj_surface->private_data;
1809
1810         if (!gen7_vc1_surface ||
1811             (gen7_vc1_surface->picture_type == GEN7_VC1_I_PICTURE ||
1812              gen7_vc1_surface->picture_type == GEN7_VC1_BI_PICTURE))
1813             dmv_surface_valid = 0;
1814         else
1815             dmv_surface_valid = 1;
1816     }
1817
1818     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1819
1820     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1821         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1822     else {
1823         if (pic_param->picture_fields.bits.top_field_first)
1824             fcm = 2;
1825         else
1826             fcm = 3;
1827     }
1828
1829     if (picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1830         brfd = pic_param->reference_fields.bits.reference_distance;
1831         brfd = (scale_factor * brfd) >> 8;
1832         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1833
1834         if (brfd < 0)
1835             brfd = 0;
1836     }
1837
1838     if (pic_param->sequence_fields.bits.overlap) {
1839         if (profile == GEN7_VC1_ADVANCED_PROFILE) {
1840             if (picture_type == GEN7_VC1_P_PICTURE &&
1841                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1842                 overlap = 1;
1843             }
1844             if (picture_type == GEN7_VC1_I_PICTURE ||
1845                 picture_type == GEN7_VC1_BI_PICTURE) {
1846                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1847                     overlap = 1;
1848                 } else if (pic_param->conditional_overlap_flag == 1 || /* all block boundaries */
1849                            pic_param->conditional_overlap_flag == 2) { /* coded by OVERFLAGSMB bitplane */
1850                     overlap = 1;
1851                 }
1852             }
1853         } else {
1854             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1855                 picture_type != GEN7_VC1_B_PICTURE) {
1856                 overlap = 1;
1857             }
1858         }
1859     }
1860
1861     assert(pic_param->conditional_overlap_flag < 3);
1862     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1863
1864     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1865         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1866          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1867         interpolation_mode = 9; /* Half-pel bilinear */
1868     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1869              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1870               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1871         interpolation_mode = 1; /* Half-pel bicubic */
1872     else
1873         interpolation_mode = 0; /* Quarter-pel bicubic */
1874
1875     BEGIN_BCS_BATCH(batch, 6);
1876     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1877     OUT_BCS_BATCH(batch,
1878                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1879                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1880     OUT_BCS_BATCH(batch,
1881                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1882                   dmv_surface_valid << 15 |
1883                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1884                   pic_param->rounding_control << 13 |
1885                   pic_param->sequence_fields.bits.syncmarker << 12 |
1886                   interpolation_mode << 8 |
1887                   0 << 7 | /* FIXME: scale up or down ??? */
1888                   pic_param->range_reduction_frame << 6 |
1889                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1890                   overlap << 4 |
1891                   !pic_param->picture_fields.bits.is_first_field << 3 |
1892                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1893     OUT_BCS_BATCH(batch,
1894                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1895                   ptype << 26 |
1896                   fcm << 24 |
1897                   alt_pq << 16 |
1898                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1899                   scale_factor << 0);
1900     OUT_BCS_BATCH(batch,
1901                   unified_mv_mode << 28 |
1902                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1903                   pic_param->fast_uvmc_flag << 26 |
1904                   ref_field_pic_polarity << 25 |
1905                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1906                   pic_param->reference_fields.bits.reference_distance << 20 |
1907                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1908                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1909                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1910                   alt_pquant_edge_mask << 4 |
1911                   alt_pquant_config << 2 |
1912                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1913                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1914     OUT_BCS_BATCH(batch,
1915                   !!(pic_param->bitplane_present.value & 0x7f) << 31 |
1916                   pic_param->raw_coding.flags.forward_mb << 30 |
1917                   pic_param->raw_coding.flags.mv_type_mb << 29 |
1918                   pic_param->raw_coding.flags.skip_mb << 28 |
1919                   pic_param->raw_coding.flags.direct_mb << 27 |
1920                   pic_param->raw_coding.flags.overflags << 26 |
1921                   pic_param->raw_coding.flags.ac_pred << 25 |
1922                   pic_param->raw_coding.flags.field_tx << 24 |
1923                   pic_param->mv_fields.bits.mv_table << 20 |
1924                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1925                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1926                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1927                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1928                   pic_param->mb_mode_table << 8 |
1929                   trans_ac_y << 6 |
1930                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1931                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1932                   pic_param->cbp_table << 0);
1933     ADVANCE_BCS_BATCH(batch);
1934 }
1935
1936 static void
1937 gen75_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1938                               struct decode_state *decode_state,
1939                               struct gen7_mfd_context *gen7_mfd_context)
1940 {
1941     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1942     VAPictureParameterBufferVC1 *pic_param;
1943     int picture_type;
1944     int intensitycomp_single_fwd = 0;
1945     int luma_scale1 = 0;
1946     int luma_shift1 = 0;
1947
1948     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1949     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1950     picture_type = pic_param->picture_fields.bits.picture_type;
1951
1952     if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID) {
1953         if (picture_type == 1 || picture_type == 2) { /* P/B picture */
1954             struct gen7_vc1_surface *gen7_vc1_surface = gen7_mfd_context->reference_surface[0].obj_surface->private_data;
1955             if (gen7_vc1_surface) {
1956                 intensitycomp_single_fwd = gen7_vc1_surface->intensity_compensation;
1957                 luma_scale1 = gen7_vc1_surface->luma_scale;
1958                 luma_shift1 = gen7_vc1_surface->luma_shift;
1959             }
1960         }
1961     }
1962
1963     BEGIN_BCS_BATCH(batch, 6);
1964     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1965     OUT_BCS_BATCH(batch,
1966                   0 << 14 | /* FIXME: double ??? */
1967                   0 << 12 |
1968                   intensitycomp_single_fwd << 10 |
1969                   0 << 8 |
1970                   0 << 4 | /* FIXME: interlace mode */
1971                   0);
1972     OUT_BCS_BATCH(batch,
1973                   luma_shift1 << 16 |
1974                   luma_scale1 << 0);
1975     OUT_BCS_BATCH(batch, 0);
1976     OUT_BCS_BATCH(batch, 0);
1977     OUT_BCS_BATCH(batch, 0);
1978     ADVANCE_BCS_BATCH(batch);
1979 }
1980
1981 static void
1982 gen75_mfd_vc1_directmode_state_bplus(VADriverContextP ctx,
1983                                      struct decode_state *decode_state,
1984                                      struct gen7_mfd_context *gen7_mfd_context)
1985 {
1986     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1987     struct object_surface *obj_surface;
1988     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1989
1990     obj_surface = decode_state->render_object;
1991
1992     if (obj_surface && obj_surface->private_data) {
1993         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1994     }
1995
1996     obj_surface = decode_state->reference_objects[1];
1997
1998     if (obj_surface && obj_surface->private_data) {
1999         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2000     }
2001
2002     BEGIN_BCS_BATCH(batch, 7);
2003     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
2004
2005     if (dmv_write_buffer)
2006         OUT_BCS_RELOC(batch, dmv_write_buffer,
2007                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2008                       0);
2009     else
2010         OUT_BCS_BATCH(batch, 0);
2011
2012     OUT_BCS_BATCH(batch, 0);
2013     OUT_BCS_BATCH(batch, 0);
2014
2015     if (dmv_read_buffer)
2016         OUT_BCS_RELOC(batch, dmv_read_buffer,
2017                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2018                       0);
2019     else
2020         OUT_BCS_BATCH(batch, 0);
2021     OUT_BCS_BATCH(batch, 0);
2022     OUT_BCS_BATCH(batch, 0);
2023
2024     ADVANCE_BCS_BATCH(batch);
2025 }
2026
2027 static void
2028 gen75_mfd_vc1_directmode_state(VADriverContextP ctx,
2029                                struct decode_state *decode_state,
2030                                struct gen7_mfd_context *gen7_mfd_context)
2031 {
2032     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2033     struct i965_driver_data *i965 = i965_driver_data(ctx);
2034     struct object_surface *obj_surface;
2035     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2036
2037     if (IS_STEPPING_BPLUS(i965)) {
2038         gen75_mfd_vc1_directmode_state_bplus(ctx, decode_state, gen7_mfd_context);
2039         return;
2040     }
2041
2042     obj_surface = decode_state->render_object;
2043
2044     if (obj_surface && obj_surface->private_data) {
2045         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2046     }
2047
2048     obj_surface = decode_state->reference_objects[1];
2049
2050     if (obj_surface && obj_surface->private_data) {
2051         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2052     }
2053
2054     BEGIN_BCS_BATCH(batch, 3);
2055     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
2056
2057     if (dmv_write_buffer)
2058         OUT_BCS_RELOC(batch, dmv_write_buffer,
2059                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2060                       0);
2061     else
2062         OUT_BCS_BATCH(batch, 0);
2063
2064     if (dmv_read_buffer)
2065         OUT_BCS_RELOC(batch, dmv_read_buffer,
2066                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2067                       0);
2068     else
2069         OUT_BCS_BATCH(batch, 0);
2070
2071     ADVANCE_BCS_BATCH(batch);
2072 }
2073
2074 static int
2075 gen75_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
2076 {
2077     int out_slice_data_bit_offset;
2078     int slice_header_size = in_slice_data_bit_offset / 8;
2079     int i, j;
2080
2081     if (profile != 3)
2082         out_slice_data_bit_offset = in_slice_data_bit_offset;
2083     else {
2084         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
2085             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
2086                 if (i < slice_header_size - 1)
2087                     i++, j += 2;
2088                 else {
2089                     buf[j + 2] = buf[j + 1];
2090                     j++;
2091                 }
2092             }
2093         }
2094
2095         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
2096     }
2097
2098     return out_slice_data_bit_offset;
2099 }
2100
2101 static void
2102 gen75_mfd_vc1_bsd_object(VADriverContextP ctx,
2103                          VAPictureParameterBufferVC1 *pic_param,
2104                          VASliceParameterBufferVC1 *slice_param,
2105                          VASliceParameterBufferVC1 *next_slice_param,
2106                          dri_bo *slice_data_bo,
2107                          struct gen7_mfd_context *gen7_mfd_context)
2108 {
2109     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2110     int next_slice_start_vert_pos;
2111     int macroblock_offset;
2112     uint8_t *slice_data = NULL;
2113
2114     dri_bo_map(slice_data_bo, True);
2115     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
2116     macroblock_offset = gen75_mfd_vc1_get_macroblock_bit_offset(slice_data,
2117                                                                 slice_param->macroblock_offset,
2118                                                                 pic_param->sequence_fields.bits.profile);
2119     dri_bo_unmap(slice_data_bo);
2120
2121     if (next_slice_param)
2122         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
2123     else
2124         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
2125
2126     BEGIN_BCS_BATCH(batch, 5);
2127     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
2128     OUT_BCS_BATCH(batch,
2129                   slice_param->slice_data_size - (macroblock_offset >> 3));
2130     OUT_BCS_BATCH(batch,
2131                   slice_param->slice_data_offset + (macroblock_offset >> 3));
2132     OUT_BCS_BATCH(batch,
2133                   slice_param->slice_vertical_position << 16 |
2134                   next_slice_start_vert_pos << 0);
2135     OUT_BCS_BATCH(batch,
2136                   (macroblock_offset & 0x7));
2137     ADVANCE_BCS_BATCH(batch);
2138 }
2139
2140 static void
2141 gen75_mfd_vc1_decode_picture(VADriverContextP ctx,
2142                              struct decode_state *decode_state,
2143                              struct gen7_mfd_context *gen7_mfd_context)
2144 {
2145     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2146     VAPictureParameterBufferVC1 *pic_param;
2147     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
2148     dri_bo *slice_data_bo;
2149     int i, j;
2150
2151     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2152     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2153
2154     gen75_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
2155     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2156     intel_batchbuffer_emit_mi_flush(batch);
2157     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2158     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2159     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2160     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2161     gen75_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
2162     gen75_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
2163     gen75_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
2164
2165     for (j = 0; j < decode_state->num_slice_params; j++) {
2166         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2167         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2168         slice_data_bo = decode_state->slice_datas[j]->bo;
2169         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
2170
2171         if (j == decode_state->num_slice_params - 1)
2172             next_slice_group_param = NULL;
2173         else
2174             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2175
2176         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2177             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2178
2179             if (i < decode_state->slice_params[j]->num_elements - 1)
2180                 next_slice_param = slice_param + 1;
2181             else
2182                 next_slice_param = next_slice_group_param;
2183
2184             gen75_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2185             slice_param++;
2186         }
2187     }
2188
2189     intel_batchbuffer_end_atomic(batch);
2190     intel_batchbuffer_flush(batch);
2191 }
2192
2193 static void
2194 gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
2195                            struct decode_state *decode_state,
2196                            struct gen7_mfd_context *gen7_mfd_context)
2197 {
2198     struct object_surface *obj_surface;
2199     VAPictureParameterBufferJPEGBaseline *pic_param;
2200     int subsampling = SUBSAMPLE_YUV420;
2201     int fourcc = VA_FOURCC_IMC3;
2202
2203     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2204
2205     if (pic_param->num_components == 1) {
2206         subsampling = SUBSAMPLE_YUV400;
2207         fourcc = VA_FOURCC_Y800;
2208     } else if (pic_param->num_components == 3) {
2209         int h1 = pic_param->components[0].h_sampling_factor;
2210         int h2 = pic_param->components[1].h_sampling_factor;
2211         int h3 = pic_param->components[2].h_sampling_factor;
2212         int v1 = pic_param->components[0].v_sampling_factor;
2213         int v2 = pic_param->components[1].v_sampling_factor;
2214         int v3 = pic_param->components[2].v_sampling_factor;
2215
2216         if (h1 == 2 * h2 && h2 == h3 &&
2217             v1 == 2 * v2 && v2 == v3) {
2218             subsampling = SUBSAMPLE_YUV420;
2219             fourcc = VA_FOURCC_IMC3;
2220         } else if (h1 == 2 * h2  && h2 == h3 &&
2221                    v1 == v2 && v2 == v3) {
2222             subsampling = SUBSAMPLE_YUV422H;
2223             fourcc = VA_FOURCC_422H;
2224         } else if (h1 == h2 && h2 == h3 &&
2225                    v1 == v2  && v2 == v3) {
2226             subsampling = SUBSAMPLE_YUV444;
2227             fourcc = VA_FOURCC_444P;
2228         } else if (h1 == 4 * h2 && h2 ==  h3 &&
2229                    v1 == v2 && v2 == v3) {
2230             subsampling = SUBSAMPLE_YUV411;
2231             fourcc = VA_FOURCC_411P;
2232         } else if (h1 == h2 && h2 == h3 &&
2233                    v1 == 2 * v2 && v2 == v3) {
2234             subsampling = SUBSAMPLE_YUV422V;
2235             fourcc = VA_FOURCC_422V;
2236         } else
2237             assert(0);
2238     } else {
2239         assert(0);
2240     }
2241
2242     /* Current decoded picture */
2243     obj_surface = decode_state->render_object;
2244     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
2245
2246     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2247     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2248     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2249     gen7_mfd_context->pre_deblocking_output.valid = 1;
2250
2251     gen7_mfd_context->post_deblocking_output.bo = NULL;
2252     gen7_mfd_context->post_deblocking_output.valid = 0;
2253
2254     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2255     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2256
2257     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2258     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2259
2260     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2261     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2262
2263     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2264     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2265
2266     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2267     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2268 }
2269
2270 static const int va_to_gen7_jpeg_rotation[4] = {
2271     GEN7_JPEG_ROTATION_0,
2272     GEN7_JPEG_ROTATION_90,
2273     GEN7_JPEG_ROTATION_180,
2274     GEN7_JPEG_ROTATION_270
2275 };
2276
2277 static void
2278 gen75_mfd_jpeg_pic_state(VADriverContextP ctx,
2279                          struct decode_state *decode_state,
2280                          struct gen7_mfd_context *gen7_mfd_context)
2281 {
2282     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2283     VAPictureParameterBufferJPEGBaseline *pic_param;
2284     int chroma_type = GEN7_YUV420;
2285     int frame_width_in_blks;
2286     int frame_height_in_blks;
2287
2288     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2289     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2290
2291     if (pic_param->num_components == 1)
2292         chroma_type = GEN7_YUV400;
2293     else if (pic_param->num_components == 3) {
2294         int h1 = pic_param->components[0].h_sampling_factor;
2295         int h2 = pic_param->components[1].h_sampling_factor;
2296         int h3 = pic_param->components[2].h_sampling_factor;
2297         int v1 = pic_param->components[0].v_sampling_factor;
2298         int v2 = pic_param->components[1].v_sampling_factor;
2299         int v3 = pic_param->components[2].v_sampling_factor;
2300
2301         if (h1 == 2 * h2 && h2 == h3 &&
2302             v1 == 2 * v2 && v2 == v3)
2303             chroma_type = GEN7_YUV420;
2304         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2305                  v1 == 1 && v2 == 1 && v3 == 1)
2306             chroma_type = GEN7_YUV422H_2Y;
2307         else if (h1 == h2 && h2 == h3 &&
2308                  v1 == v2 && v2 == v3)
2309             chroma_type = GEN7_YUV444;
2310         else if (h1 == 4 * h2 && h2 == h3 &&
2311                  v1 == v2 && v2 == v3)
2312             chroma_type = GEN7_YUV411;
2313         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2314                  v1 == 2 && v2 == 1 && v3 == 1)
2315             chroma_type = GEN7_YUV422V_2Y;
2316         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2317                  v1 == 2 && v2 == 2 && v3 == 2)
2318             chroma_type = GEN7_YUV422H_4Y;
2319         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2320                  v1 == 2 && v2 == 1 && v3 == 1)
2321             chroma_type = GEN7_YUV422V_4Y;
2322         else
2323             assert(0);
2324     }
2325
2326     if (chroma_type == GEN7_YUV400 ||
2327         chroma_type == GEN7_YUV444 ||
2328         chroma_type == GEN7_YUV422V_2Y) {
2329         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2330         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2331     } else if (chroma_type == GEN7_YUV411) {
2332         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2333         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2334     } else {
2335         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2336         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2337     }
2338
2339     BEGIN_BCS_BATCH(batch, 3);
2340     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2341     OUT_BCS_BATCH(batch,
2342                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2343                   (chroma_type << 0));
2344     OUT_BCS_BATCH(batch,
2345                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2346                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2347     ADVANCE_BCS_BATCH(batch);
2348 }
2349
2350 static const int va_to_gen7_jpeg_hufftable[2] = {
2351     MFX_HUFFTABLE_ID_Y,
2352     MFX_HUFFTABLE_ID_UV
2353 };
2354
2355 static void
2356 gen75_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2357                                 struct decode_state *decode_state,
2358                                 struct gen7_mfd_context *gen7_mfd_context,
2359                                 int num_tables)
2360 {
2361     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2362     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2363     int index;
2364
2365     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2366         return;
2367
2368     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2369
2370     for (index = 0; index < num_tables; index++) {
2371         int id = va_to_gen7_jpeg_hufftable[index];
2372
2373         if (!huffman_table->load_huffman_table[index])
2374             continue;
2375
2376         BEGIN_BCS_BATCH(batch, 53);
2377         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2378         OUT_BCS_BATCH(batch, id);
2379         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2380         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2381         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2382         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2383         ADVANCE_BCS_BATCH(batch);
2384     }
2385 }
2386
2387 static const int va_to_gen7_jpeg_qm[5] = {
2388     -1,
2389     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2390     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2391     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2392     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2393 };
2394
2395 static void
2396 gen75_mfd_jpeg_qm_state(VADriverContextP ctx,
2397                         struct decode_state *decode_state,
2398                         struct gen7_mfd_context *gen7_mfd_context)
2399 {
2400     VAPictureParameterBufferJPEGBaseline *pic_param;
2401     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2402     int index;
2403
2404     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2405         return;
2406
2407     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2408     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2409
2410     assert(pic_param->num_components <= 3);
2411
2412     for (index = 0; index < pic_param->num_components; index++) {
2413         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2414         int qm_type;
2415         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2416         unsigned char raster_qm[64];
2417         int j;
2418
2419         if (id > 4 || id < 1)
2420             continue;
2421
2422         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2423             continue;
2424
2425         qm_type = va_to_gen7_jpeg_qm[id];
2426
2427         for (j = 0; j < 64; j++)
2428             raster_qm[zigzag_direct[j]] = qm[j];
2429
2430         gen75_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2431     }
2432 }
2433
2434 static void
2435 gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
2436                           VAPictureParameterBufferJPEGBaseline *pic_param,
2437                           VASliceParameterBufferJPEGBaseline *slice_param,
2438                           VASliceParameterBufferJPEGBaseline *next_slice_param,
2439                           dri_bo *slice_data_bo,
2440                           struct gen7_mfd_context *gen7_mfd_context)
2441 {
2442     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2443     int scan_component_mask = 0;
2444     int i;
2445
2446     assert(slice_param->num_components > 0);
2447     assert(slice_param->num_components < 4);
2448     assert(slice_param->num_components <= pic_param->num_components);
2449
2450     for (i = 0; i < slice_param->num_components; i++) {
2451         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2452         case 1:
2453             scan_component_mask |= (1 << 0);
2454             break;
2455         case 2:
2456             scan_component_mask |= (1 << 1);
2457             break;
2458         case 3:
2459             scan_component_mask |= (1 << 2);
2460             break;
2461         default:
2462             assert(0);
2463             break;
2464         }
2465     }
2466
2467     BEGIN_BCS_BATCH(batch, 6);
2468     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2469     OUT_BCS_BATCH(batch,
2470                   slice_param->slice_data_size);
2471     OUT_BCS_BATCH(batch,
2472                   slice_param->slice_data_offset);
2473     OUT_BCS_BATCH(batch,
2474                   slice_param->slice_horizontal_position << 16 |
2475                   slice_param->slice_vertical_position << 0);
2476     OUT_BCS_BATCH(batch,
2477                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2478                   (scan_component_mask << 27) |                 /* scan components */
2479                   (0 << 26) |   /* disable interrupt allowed */
2480                   (slice_param->num_mcus << 0));                /* MCU count */
2481     OUT_BCS_BATCH(batch,
2482                   (slice_param->restart_interval << 0));    /* RestartInterval */
2483     ADVANCE_BCS_BATCH(batch);
2484 }
2485
2486 /* Workaround for JPEG decoding on Ivybridge */
2487
2488 static struct {
2489     int width;
2490     int height;
2491     unsigned char data[32];
2492     int data_size;
2493     int data_bit_offset;
2494     int qp;
2495 } gen7_jpeg_wa_clip = {
2496     16,
2497     16,
2498     {
2499         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2500         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2501     },
2502     14,
2503     40,
2504     28,
2505 };
2506
2507 static void
2508 gen75_jpeg_wa_init(VADriverContextP ctx,
2509                    struct gen7_mfd_context *gen7_mfd_context)
2510 {
2511     struct i965_driver_data *i965 = i965_driver_data(ctx);
2512     VAStatus status;
2513     struct object_surface *obj_surface;
2514
2515     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2516         i965_DestroySurfaces(ctx,
2517                              &gen7_mfd_context->jpeg_wa_surface_id,
2518                              1);
2519
2520     status = i965_CreateSurfaces(ctx,
2521                                  gen7_jpeg_wa_clip.width,
2522                                  gen7_jpeg_wa_clip.height,
2523                                  VA_RT_FORMAT_YUV420,
2524                                  1,
2525                                  &gen7_mfd_context->jpeg_wa_surface_id);
2526     assert(status == VA_STATUS_SUCCESS);
2527
2528     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2529     assert(obj_surface);
2530     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2531     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2532
2533     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2534         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2535                                                                "JPEG WA data",
2536                                                                0x1000,
2537                                                                0x1000);
2538         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2539                        0,
2540                        gen7_jpeg_wa_clip.data_size,
2541                        gen7_jpeg_wa_clip.data);
2542     }
2543 }
2544
2545 static void
2546 gen75_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2547                                struct gen7_mfd_context *gen7_mfd_context)
2548 {
2549     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2550
2551     BEGIN_BCS_BATCH(batch, 5);
2552     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2553     OUT_BCS_BATCH(batch,
2554                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2555                   (MFD_MODE_VLD << 15) | /* VLD mode */
2556                   (0 << 10) | /* disable Stream-Out */
2557                   (0 << 9)  | /* Post Deblocking Output */
2558                   (1 << 8)  | /* Pre Deblocking Output */
2559                   (0 << 5)  | /* not in stitch mode */
2560                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2561                   (MFX_FORMAT_AVC << 0));
2562     OUT_BCS_BATCH(batch,
2563                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2564                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2565                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2566                   (0 << 1)  |
2567                   (0 << 0));
2568     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2569     OUT_BCS_BATCH(batch, 0); /* reserved */
2570     ADVANCE_BCS_BATCH(batch);
2571 }
2572
2573 static void
2574 gen75_jpeg_wa_surface_state(VADriverContextP ctx,
2575                             struct gen7_mfd_context *gen7_mfd_context)
2576 {
2577     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2578     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2579
2580     BEGIN_BCS_BATCH(batch, 6);
2581     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2582     OUT_BCS_BATCH(batch, 0);
2583     OUT_BCS_BATCH(batch,
2584                   ((obj_surface->orig_width - 1) << 18) |
2585                   ((obj_surface->orig_height - 1) << 4));
2586     OUT_BCS_BATCH(batch,
2587                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2588                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2589                   (0 << 22) | /* surface object control state, ignored */
2590                   ((obj_surface->width - 1) << 3) | /* pitch */
2591                   (0 << 2)  | /* must be 0 */
2592                   (1 << 1)  | /* must be tiled */
2593                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2594     OUT_BCS_BATCH(batch,
2595                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2596                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2597     OUT_BCS_BATCH(batch,
2598                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2599                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2600     ADVANCE_BCS_BATCH(batch);
2601 }
2602
2603 static void
2604 gen75_jpeg_wa_pipe_buf_addr_state_bplus(VADriverContextP ctx,
2605                                         struct gen7_mfd_context *gen7_mfd_context)
2606 {
2607     struct i965_driver_data *i965 = i965_driver_data(ctx);
2608     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2609     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2610     dri_bo *intra_bo;
2611     int i;
2612
2613     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2614                             "intra row store",
2615                             128 * 64,
2616                             0x1000);
2617
2618     BEGIN_BCS_BATCH(batch, 61);
2619     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2620     OUT_BCS_RELOC(batch,
2621                   obj_surface->bo,
2622                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2623                   0);
2624     OUT_BCS_BATCH(batch, 0);
2625     OUT_BCS_BATCH(batch, 0);
2626
2627
2628     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2629     OUT_BCS_BATCH(batch, 0);
2630     OUT_BCS_BATCH(batch, 0);
2631
2632     /* uncompressed-video & stream out 7-12 */
2633     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2634     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2635     OUT_BCS_BATCH(batch, 0);
2636     OUT_BCS_BATCH(batch, 0);
2637     OUT_BCS_BATCH(batch, 0);
2638     OUT_BCS_BATCH(batch, 0);
2639
2640     /* the DW 13-15 is for intra row store scratch */
2641     OUT_BCS_RELOC(batch,
2642                   intra_bo,
2643                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2644                   0);
2645     OUT_BCS_BATCH(batch, 0);
2646     OUT_BCS_BATCH(batch, 0);
2647
2648     /* the DW 16-18 is for deblocking filter */
2649     OUT_BCS_BATCH(batch, 0);
2650     OUT_BCS_BATCH(batch, 0);
2651     OUT_BCS_BATCH(batch, 0);
2652
2653     /* DW 19..50 */
2654     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2655         OUT_BCS_BATCH(batch, 0);
2656         OUT_BCS_BATCH(batch, 0);
2657     }
2658     OUT_BCS_BATCH(batch, 0);
2659
2660     /* the DW52-54 is for mb status address */
2661     OUT_BCS_BATCH(batch, 0);
2662     OUT_BCS_BATCH(batch, 0);
2663     OUT_BCS_BATCH(batch, 0);
2664     /* the DW56-60 is for ILDB & second ILDB address */
2665     OUT_BCS_BATCH(batch, 0);
2666     OUT_BCS_BATCH(batch, 0);
2667     OUT_BCS_BATCH(batch, 0);
2668     OUT_BCS_BATCH(batch, 0);
2669     OUT_BCS_BATCH(batch, 0);
2670     OUT_BCS_BATCH(batch, 0);
2671
2672     ADVANCE_BCS_BATCH(batch);
2673
2674     dri_bo_unreference(intra_bo);
2675 }
2676
2677 static void
2678 gen75_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2679                                   struct gen7_mfd_context *gen7_mfd_context)
2680 {
2681     struct i965_driver_data *i965 = i965_driver_data(ctx);
2682     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2683     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2684     dri_bo *intra_bo;
2685     int i;
2686
2687     if (IS_STEPPING_BPLUS(i965)) {
2688         gen75_jpeg_wa_pipe_buf_addr_state_bplus(ctx, gen7_mfd_context);
2689         return;
2690     }
2691
2692     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2693                             "intra row store",
2694                             128 * 64,
2695                             0x1000);
2696
2697     BEGIN_BCS_BATCH(batch, 25);
2698     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
2699     OUT_BCS_RELOC(batch,
2700                   obj_surface->bo,
2701                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2702                   0);
2703
2704     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2705
2706     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2707     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2708
2709     OUT_BCS_RELOC(batch,
2710                   intra_bo,
2711                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2712                   0);
2713
2714     OUT_BCS_BATCH(batch, 0);
2715
2716     /* DW 7..22 */
2717     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2718         OUT_BCS_BATCH(batch, 0);
2719     }
2720
2721     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2722     OUT_BCS_BATCH(batch, 0);
2723     ADVANCE_BCS_BATCH(batch);
2724
2725     dri_bo_unreference(intra_bo);
2726 }
2727
2728 static void
2729 gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
2730                                             struct gen7_mfd_context *gen7_mfd_context)
2731 {
2732     struct i965_driver_data *i965 = i965_driver_data(ctx);
2733     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2734     dri_bo *bsd_mpc_bo, *mpr_bo;
2735
2736     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2737                               "bsd mpc row store",
2738                               11520, /* 1.5 * 120 * 64 */
2739                               0x1000);
2740
2741     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2742                           "mpr row store",
2743                           7680, /* 1. 0 * 120 * 64 */
2744                           0x1000);
2745
2746     BEGIN_BCS_BATCH(batch, 10);
2747     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2748
2749     OUT_BCS_RELOC(batch,
2750                   bsd_mpc_bo,
2751                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2752                   0);
2753
2754     OUT_BCS_BATCH(batch, 0);
2755     OUT_BCS_BATCH(batch, 0);
2756
2757     OUT_BCS_RELOC(batch,
2758                   mpr_bo,
2759                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2760                   0);
2761     OUT_BCS_BATCH(batch, 0);
2762     OUT_BCS_BATCH(batch, 0);
2763
2764     OUT_BCS_BATCH(batch, 0);
2765     OUT_BCS_BATCH(batch, 0);
2766     OUT_BCS_BATCH(batch, 0);
2767
2768     ADVANCE_BCS_BATCH(batch);
2769
2770     dri_bo_unreference(bsd_mpc_bo);
2771     dri_bo_unreference(mpr_bo);
2772 }
2773
2774 static void
2775 gen75_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2776                                       struct gen7_mfd_context *gen7_mfd_context)
2777 {
2778     struct i965_driver_data *i965 = i965_driver_data(ctx);
2779     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2780     dri_bo *bsd_mpc_bo, *mpr_bo;
2781
2782     if (IS_STEPPING_BPLUS(i965)) {
2783         gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(ctx, gen7_mfd_context);
2784         return;
2785     }
2786
2787     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2788                               "bsd mpc row store",
2789                               11520, /* 1.5 * 120 * 64 */
2790                               0x1000);
2791
2792     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2793                           "mpr row store",
2794                           7680, /* 1. 0 * 120 * 64 */
2795                           0x1000);
2796
2797     BEGIN_BCS_BATCH(batch, 4);
2798     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2799
2800     OUT_BCS_RELOC(batch,
2801                   bsd_mpc_bo,
2802                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2803                   0);
2804
2805     OUT_BCS_RELOC(batch,
2806                   mpr_bo,
2807                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2808                   0);
2809     OUT_BCS_BATCH(batch, 0);
2810
2811     ADVANCE_BCS_BATCH(batch);
2812
2813     dri_bo_unreference(bsd_mpc_bo);
2814     dri_bo_unreference(mpr_bo);
2815 }
2816
2817 static void
2818 gen75_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2819                            struct gen7_mfd_context *gen7_mfd_context)
2820 {
2821
2822 }
2823
2824 static void
2825 gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
2826                             struct gen7_mfd_context *gen7_mfd_context)
2827 {
2828     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2829     int img_struct = 0;
2830     int mbaff_frame_flag = 0;
2831     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2832
2833     BEGIN_BCS_BATCH(batch, 16);
2834     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2835     OUT_BCS_BATCH(batch,
2836                   (width_in_mbs * height_in_mbs - 1));
2837     OUT_BCS_BATCH(batch,
2838                   ((height_in_mbs - 1) << 16) |
2839                   ((width_in_mbs - 1) << 0));
2840     OUT_BCS_BATCH(batch,
2841                   (0 << 24) |
2842                   (0 << 16) |
2843                   (0 << 14) |
2844                   (0 << 13) |
2845                   (0 << 12) | /* differ from GEN6 */
2846                   (0 << 10) |
2847                   (img_struct << 8));
2848     OUT_BCS_BATCH(batch,
2849                   (1 << 10) | /* 4:2:0 */
2850                   (1 << 7) |  /* CABAC */
2851                   (0 << 6) |
2852                   (0 << 5) |
2853                   (0 << 4) |
2854                   (0 << 3) |
2855                   (1 << 2) |
2856                   (mbaff_frame_flag << 1) |
2857                   (0 << 0));
2858     OUT_BCS_BATCH(batch, 0);
2859     OUT_BCS_BATCH(batch, 0);
2860     OUT_BCS_BATCH(batch, 0);
2861     OUT_BCS_BATCH(batch, 0);
2862     OUT_BCS_BATCH(batch, 0);
2863     OUT_BCS_BATCH(batch, 0);
2864     OUT_BCS_BATCH(batch, 0);
2865     OUT_BCS_BATCH(batch, 0);
2866     OUT_BCS_BATCH(batch, 0);
2867     OUT_BCS_BATCH(batch, 0);
2868     OUT_BCS_BATCH(batch, 0);
2869     ADVANCE_BCS_BATCH(batch);
2870 }
2871
2872 static void
2873 gen75_jpeg_wa_avc_directmode_state_bplus(VADriverContextP ctx,
2874                                          struct gen7_mfd_context *gen7_mfd_context)
2875 {
2876     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2877     int i;
2878
2879     BEGIN_BCS_BATCH(batch, 71);
2880     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2881
2882     /* reference surfaces 0..15 */
2883     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2884         OUT_BCS_BATCH(batch, 0); /* top */
2885         OUT_BCS_BATCH(batch, 0); /* bottom */
2886     }
2887
2888     OUT_BCS_BATCH(batch, 0);
2889
2890     /* the current decoding frame/field */
2891     OUT_BCS_BATCH(batch, 0); /* top */
2892     OUT_BCS_BATCH(batch, 0);
2893     OUT_BCS_BATCH(batch, 0);
2894
2895     /* POC List */
2896     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2897         OUT_BCS_BATCH(batch, 0);
2898         OUT_BCS_BATCH(batch, 0);
2899     }
2900
2901     OUT_BCS_BATCH(batch, 0);
2902     OUT_BCS_BATCH(batch, 0);
2903
2904     ADVANCE_BCS_BATCH(batch);
2905 }
2906
2907 static void
2908 gen75_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2909                                    struct gen7_mfd_context *gen7_mfd_context)
2910 {
2911     struct i965_driver_data *i965 = i965_driver_data(ctx);
2912     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2913     int i;
2914
2915     if (IS_STEPPING_BPLUS(i965)) {
2916         gen75_jpeg_wa_avc_directmode_state_bplus(ctx, gen7_mfd_context);
2917         return;
2918     }
2919
2920     BEGIN_BCS_BATCH(batch, 69);
2921     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
2922
2923     /* reference surfaces 0..15 */
2924     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2925         OUT_BCS_BATCH(batch, 0); /* top */
2926         OUT_BCS_BATCH(batch, 0); /* bottom */
2927     }
2928
2929     /* the current decoding frame/field */
2930     OUT_BCS_BATCH(batch, 0); /* top */
2931     OUT_BCS_BATCH(batch, 0); /* bottom */
2932
2933     /* POC List */
2934     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2935         OUT_BCS_BATCH(batch, 0);
2936         OUT_BCS_BATCH(batch, 0);
2937     }
2938
2939     OUT_BCS_BATCH(batch, 0);
2940     OUT_BCS_BATCH(batch, 0);
2941
2942     ADVANCE_BCS_BATCH(batch);
2943 }
2944
2945 static void
2946 gen75_jpeg_wa_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
2947                                             struct gen7_mfd_context *gen7_mfd_context)
2948 {
2949     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2950
2951     BEGIN_BCS_BATCH(batch, 11);
2952     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2953     OUT_BCS_RELOC(batch,
2954                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2955                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2956                   0);
2957     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2958     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2959     OUT_BCS_BATCH(batch, 0);
2960     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2961     OUT_BCS_BATCH(batch, 0);
2962     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2963     OUT_BCS_BATCH(batch, 0);
2964     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2965     OUT_BCS_BATCH(batch, 0);
2966     ADVANCE_BCS_BATCH(batch);
2967 }
2968
2969 static void
2970 gen75_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2971                                       struct gen7_mfd_context *gen7_mfd_context)
2972 {
2973     struct i965_driver_data *i965 = i965_driver_data(ctx);
2974     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2975
2976     if (IS_STEPPING_BPLUS(i965)) {
2977         gen75_jpeg_wa_ind_obj_base_addr_state_bplus(ctx, gen7_mfd_context);
2978         return;
2979     }
2980
2981     BEGIN_BCS_BATCH(batch, 11);
2982     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2983     OUT_BCS_RELOC(batch,
2984                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2985                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2986                   0);
2987     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2988     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2989     OUT_BCS_BATCH(batch, 0);
2990     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2991     OUT_BCS_BATCH(batch, 0);
2992     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2993     OUT_BCS_BATCH(batch, 0);
2994     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2995     OUT_BCS_BATCH(batch, 0);
2996     ADVANCE_BCS_BATCH(batch);
2997 }
2998
2999 static void
3000 gen75_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
3001                              struct gen7_mfd_context *gen7_mfd_context)
3002 {
3003     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3004
3005     /* the input bitsteam format on GEN7 differs from GEN6 */
3006     BEGIN_BCS_BATCH(batch, 6);
3007     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
3008     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
3009     OUT_BCS_BATCH(batch, 0);
3010     OUT_BCS_BATCH(batch,
3011                   (0 << 31) |
3012                   (0 << 14) |
3013                   (0 << 12) |
3014                   (0 << 10) |
3015                   (0 << 8));
3016     OUT_BCS_BATCH(batch,
3017                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
3018                   (0 << 5)  |
3019                   (0 << 4)  |
3020                   (1 << 3) | /* LastSlice Flag */
3021                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
3022     OUT_BCS_BATCH(batch, 0);
3023     ADVANCE_BCS_BATCH(batch);
3024 }
3025
3026 static void
3027 gen75_jpeg_wa_avc_slice_state(VADriverContextP ctx,
3028                               struct gen7_mfd_context *gen7_mfd_context)
3029 {
3030     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3031     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
3032     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
3033     int first_mb_in_slice = 0;
3034     int slice_type = SLICE_TYPE_I;
3035
3036     BEGIN_BCS_BATCH(batch, 11);
3037     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3038     OUT_BCS_BATCH(batch, slice_type);
3039     OUT_BCS_BATCH(batch,
3040                   (num_ref_idx_l1 << 24) |
3041                   (num_ref_idx_l0 << 16) |
3042                   (0 << 8) |
3043                   (0 << 0));
3044     OUT_BCS_BATCH(batch,
3045                   (0 << 29) |
3046                   (1 << 27) |   /* disable Deblocking */
3047                   (0 << 24) |
3048                   (gen7_jpeg_wa_clip.qp << 16) |
3049                   (0 << 8) |
3050                   (0 << 0));
3051     OUT_BCS_BATCH(batch,
3052                   (slice_ver_pos << 24) |
3053                   (slice_hor_pos << 16) |
3054                   (first_mb_in_slice << 0));
3055     OUT_BCS_BATCH(batch,
3056                   (next_slice_ver_pos << 16) |
3057                   (next_slice_hor_pos << 0));
3058     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
3059     OUT_BCS_BATCH(batch, 0);
3060     OUT_BCS_BATCH(batch, 0);
3061     OUT_BCS_BATCH(batch, 0);
3062     OUT_BCS_BATCH(batch, 0);
3063     ADVANCE_BCS_BATCH(batch);
3064 }
3065
3066 static void
3067 gen75_mfd_jpeg_wa(VADriverContextP ctx,
3068                   struct gen7_mfd_context *gen7_mfd_context)
3069 {
3070     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3071     gen75_jpeg_wa_init(ctx, gen7_mfd_context);
3072     intel_batchbuffer_emit_mi_flush(batch);
3073     gen75_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
3074     gen75_jpeg_wa_surface_state(ctx, gen7_mfd_context);
3075     gen75_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
3076     gen75_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
3077     gen75_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
3078     gen75_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
3079     gen75_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
3080
3081     gen75_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
3082     gen75_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
3083     gen75_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
3084 }
3085
3086 void
3087 gen75_mfd_jpeg_decode_picture(VADriverContextP ctx,
3088                               struct decode_state *decode_state,
3089                               struct gen7_mfd_context *gen7_mfd_context)
3090 {
3091     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3092     VAPictureParameterBufferJPEGBaseline *pic_param;
3093     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
3094     dri_bo *slice_data_bo;
3095     int i, j, max_selector = 0;
3096
3097     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3098     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
3099
3100     /* Currently only support Baseline DCT */
3101     gen75_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
3102     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3103     gen75_mfd_jpeg_wa(ctx, gen7_mfd_context);
3104     intel_batchbuffer_emit_mi_flush(batch);
3105     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3106     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3107     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3108     gen75_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
3109     gen75_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
3110
3111     for (j = 0; j < decode_state->num_slice_params; j++) {
3112         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3113         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3114         slice_data_bo = decode_state->slice_datas[j]->bo;
3115         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3116
3117         if (j == decode_state->num_slice_params - 1)
3118             next_slice_group_param = NULL;
3119         else
3120             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3121
3122         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3123             int component;
3124
3125             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3126
3127             if (i < decode_state->slice_params[j]->num_elements - 1)
3128                 next_slice_param = slice_param + 1;
3129             else
3130                 next_slice_param = next_slice_group_param;
3131
3132             for (component = 0; component < slice_param->num_components; component++) {
3133                 if (max_selector < slice_param->components[component].dc_table_selector)
3134                     max_selector = slice_param->components[component].dc_table_selector;
3135
3136                 if (max_selector < slice_param->components[component].ac_table_selector)
3137                     max_selector = slice_param->components[component].ac_table_selector;
3138             }
3139
3140             slice_param++;
3141         }
3142     }
3143
3144     assert(max_selector < 2);
3145     gen75_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
3146
3147     for (j = 0; j < decode_state->num_slice_params; j++) {
3148         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3149         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3150         slice_data_bo = decode_state->slice_datas[j]->bo;
3151         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3152
3153         if (j == decode_state->num_slice_params - 1)
3154             next_slice_group_param = NULL;
3155         else
3156             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3157
3158         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3159             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3160
3161             if (i < decode_state->slice_params[j]->num_elements - 1)
3162                 next_slice_param = slice_param + 1;
3163             else
3164                 next_slice_param = next_slice_group_param;
3165
3166             gen75_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
3167             slice_param++;
3168         }
3169     }
3170
3171     intel_batchbuffer_end_atomic(batch);
3172     intel_batchbuffer_flush(batch);
3173 }
3174
3175 static VAStatus
3176 gen75_mfd_decode_picture(VADriverContextP ctx,
3177                          VAProfile profile,
3178                          union codec_state *codec_state,
3179                          struct hw_context *hw_context)
3180
3181 {
3182     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3183     struct decode_state *decode_state = &codec_state->decode;
3184     VAStatus vaStatus;
3185
3186     assert(gen7_mfd_context);
3187
3188     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3189
3190     if (vaStatus != VA_STATUS_SUCCESS)
3191         goto out;
3192
3193     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3194
3195     switch (profile) {
3196     case VAProfileMPEG2Simple:
3197     case VAProfileMPEG2Main:
3198         gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3199         break;
3200
3201     case VAProfileH264ConstrainedBaseline:
3202     case VAProfileH264Main:
3203     case VAProfileH264High:
3204     case VAProfileH264StereoHigh:
3205     case VAProfileH264MultiviewHigh:
3206         gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3207         break;
3208
3209     case VAProfileVC1Simple:
3210     case VAProfileVC1Main:
3211     case VAProfileVC1Advanced:
3212         gen75_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3213         break;
3214
3215     case VAProfileJPEGBaseline:
3216         gen75_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3217         break;
3218
3219     default:
3220         assert(0);
3221         break;
3222     }
3223
3224     vaStatus = VA_STATUS_SUCCESS;
3225
3226 out:
3227     return vaStatus;
3228 }
3229
3230 static void
3231 gen75_mfd_context_destroy(void *hw_context)
3232 {
3233     VADriverContextP ctx;
3234     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3235
3236     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3237
3238     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3239     gen7_mfd_context->post_deblocking_output.bo = NULL;
3240
3241     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3242     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3243
3244     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3245     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3246
3247     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3248     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3249
3250     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3251     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3252
3253     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3254     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3255
3256     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3257     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3258
3259     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3260
3261     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3262         i965_DestroySurfaces(ctx,
3263                              &gen7_mfd_context->jpeg_wa_surface_id,
3264                              1);
3265         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3266     }
3267
3268     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3269     free(gen7_mfd_context);
3270 }
3271
3272 static void gen75_mfd_mpeg2_context_init(VADriverContextP ctx,
3273                                          struct gen7_mfd_context *gen7_mfd_context)
3274 {
3275     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3276     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3277     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3278     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3279 }
3280
3281 struct hw_context *
3282 gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3283 {
3284     struct intel_driver_data *intel = intel_driver_data(ctx);
3285     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3286     int i;
3287
3288     assert(gen7_mfd_context);
3289     gen7_mfd_context->base.destroy = gen75_mfd_context_destroy;
3290     gen7_mfd_context->base.run = gen75_mfd_decode_picture;
3291     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3292
3293     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3294         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3295         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3296         gen7_mfd_context->reference_surface[i].obj_surface = NULL;
3297     }
3298
3299     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3300     gen7_mfd_context->jpeg_wa_surface_object = NULL;
3301
3302     switch (obj_config->profile) {
3303     case VAProfileMPEG2Simple:
3304     case VAProfileMPEG2Main:
3305         gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3306         break;
3307
3308     case VAProfileH264ConstrainedBaseline:
3309     case VAProfileH264Main:
3310     case VAProfileH264High:
3311     case VAProfileH264StereoHigh:
3312     case VAProfileH264MultiviewHigh:
3313         gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
3314         break;
3315     default:
3316         break;
3317     }
3318
3319     gen7_mfd_context->driver_context = ctx;
3320     return (struct hw_context *)gen7_mfd_context;
3321 }