OSDN Git Service

Add vdenc common commands for CNL
[android-x86/hardware-intel-common-vaapi.git] / src / gen75_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui  <yakui.zhao@intel.com>
27  *
28  */
29 #include "sysdeps.h"
30
31 #include <va/va_dec_jpeg.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35 #include "i965_defines.h"
36 #include "i965_drv_video.h"
37 #include "i965_decoder_utils.h"
38 #include "gen7_mfd.h"
39 #include "intel_media.h"
40
41 #define B0_STEP_REV     2
42 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
43
44 static const uint32_t zigzag_direct[64] = {
45     0,   1,  8, 16,  9,  2,  3, 10,
46     17, 24, 32, 25, 18, 11,  4,  5,
47     12, 19, 26, 33, 40, 48, 41, 34,
48     27, 20, 13,  6,  7, 14, 21, 28,
49     35, 42, 49, 56, 57, 50, 43, 36,
50     29, 22, 15, 23, 30, 37, 44, 51,
51     58, 59, 52, 45, 38, 31, 39, 46,
52     53, 60, 61, 54, 47, 55, 62, 63
53 };
54
55 static void
56 gen75_mfd_init_avc_surface(VADriverContextP ctx,
57                            VAPictureParameterBufferH264 *pic_param,
58                            struct object_surface *obj_surface)
59 {
60     struct i965_driver_data *i965 = i965_driver_data(ctx);
61     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
62     int width_in_mbs, height_in_mbs;
63
64     obj_surface->free_private_data = gen_free_avc_surface;
65     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
66     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
67
68     if (!gen7_avc_surface) {
69         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
70         assert(gen7_avc_surface);
71         gen7_avc_surface->base.frame_store_id = -1;
72         assert((obj_surface->size & 0x3f) == 0);
73         obj_surface->private_data = gen7_avc_surface;
74     }
75
76     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
77                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
78
79     if (gen7_avc_surface->dmv_top == NULL) {
80         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
81                                                  "direct mv w/r buffer",
82                                                  width_in_mbs * height_in_mbs * 128,
83                                                  0x1000);
84         assert(gen7_avc_surface->dmv_top);
85     }
86
87     if (gen7_avc_surface->dmv_bottom_flag &&
88         gen7_avc_surface->dmv_bottom == NULL) {
89         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
90                                                     "direct mv w/r buffer",
91                                                     width_in_mbs * height_in_mbs * 128,
92                                                     0x1000);
93         assert(gen7_avc_surface->dmv_bottom);
94     }
95 }
96
97 static void
98 gen75_mfd_pipe_mode_select(VADriverContextP ctx,
99                            struct decode_state *decode_state,
100                            int standard_select,
101                            struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG);
109
110     BEGIN_BCS_BATCH(batch, 5);
111     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
112     OUT_BCS_BATCH(batch,
113                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
114                   (MFD_MODE_VLD << 15) | /* VLD mode */
115                   (0 << 10) | /* disable Stream-Out */
116                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
117                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
118                   (0 << 5)  | /* not in stitch mode */
119                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
120                   (standard_select << 0));
121     OUT_BCS_BATCH(batch,
122                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
123                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
124                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
125                   (0 << 1)  |
126                   (0 << 0));
127     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
128     OUT_BCS_BATCH(batch, 0); /* reserved */
129     ADVANCE_BCS_BATCH(batch);
130 }
131
132 static void
133 gen75_mfd_surface_state(VADriverContextP ctx,
134                         struct decode_state *decode_state,
135                         int standard_select,
136                         struct gen7_mfd_context *gen7_mfd_context)
137 {
138     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
139     struct object_surface *obj_surface = decode_state->render_object;
140     unsigned int y_cb_offset;
141     unsigned int y_cr_offset;
142     unsigned int surface_format;
143
144     assert(obj_surface);
145
146     y_cb_offset = obj_surface->y_cb_offset;
147     y_cr_offset = obj_surface->y_cr_offset;
148
149     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
150                      MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
151
152     BEGIN_BCS_BATCH(batch, 6);
153     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
154     OUT_BCS_BATCH(batch, 0);
155     OUT_BCS_BATCH(batch,
156                   ((obj_surface->orig_height - 1) << 18) |
157                   ((obj_surface->orig_width - 1) << 4));
158     OUT_BCS_BATCH(batch,
159                   (surface_format << 28) | /* 420 planar YUV surface */
160                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
161                   (0 << 22) | /* surface object control state, ignored */
162                   ((obj_surface->width - 1) << 3) | /* pitch */
163                   (0 << 2)  | /* must be 0 */
164                   (1 << 1)  | /* must be tiled */
165                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
166     OUT_BCS_BATCH(batch,
167                   (0 << 16) | /* X offset for U(Cb), must be 0 */
168                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
169     OUT_BCS_BATCH(batch,
170                   (0 << 16) | /* X offset for V(Cr), must be 0 */
171                   ((standard_select == MFX_FORMAT_JPEG ? y_cr_offset : 0) << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
172     ADVANCE_BCS_BATCH(batch);
173 }
174
175 static void
176 gen75_mfd_pipe_buf_addr_state_bplus(VADriverContextP ctx,
177                                     struct decode_state *decode_state,
178                                     int standard_select,
179                                     struct gen7_mfd_context *gen7_mfd_context)
180 {
181     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
182     int i;
183
184     BEGIN_BCS_BATCH(batch, 61);
185     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
186     /* Pre-deblock 1-3 */
187     if (gen7_mfd_context->pre_deblocking_output.valid)
188         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
189                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
190                       0);
191     else
192         OUT_BCS_BATCH(batch, 0);
193
194     OUT_BCS_BATCH(batch, 0);
195     OUT_BCS_BATCH(batch, 0);
196     /* Post-debloing 4-6 */
197     if (gen7_mfd_context->post_deblocking_output.valid)
198         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
199                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
200                       0);
201     else
202         OUT_BCS_BATCH(batch, 0);
203
204     OUT_BCS_BATCH(batch, 0);
205     OUT_BCS_BATCH(batch, 0);
206
207     /* uncompressed-video & stream out 7-12 */
208     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
209     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
210     OUT_BCS_BATCH(batch, 0);
211     OUT_BCS_BATCH(batch, 0);
212     OUT_BCS_BATCH(batch, 0);
213     OUT_BCS_BATCH(batch, 0);
214
215     /* intra row-store scratch 13-15 */
216     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
217         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
218                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
219                       0);
220     else
221         OUT_BCS_BATCH(batch, 0);
222
223     OUT_BCS_BATCH(batch, 0);
224     OUT_BCS_BATCH(batch, 0);
225     /* deblocking-filter-row-store 16-18 */
226     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
227         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
228                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
229                       0);
230     else
231         OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234
235     /* DW 19..50 */
236     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
237         struct object_surface *obj_surface;
238
239         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
240             gen7_mfd_context->reference_surface[i].obj_surface &&
241             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
242             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
243
244             OUT_BCS_RELOC(batch, obj_surface->bo,
245                           I915_GEM_DOMAIN_INSTRUCTION, 0,
246                           0);
247         } else {
248             OUT_BCS_BATCH(batch, 0);
249         }
250         OUT_BCS_BATCH(batch, 0);
251     }
252     /* reference property 51 */
253     OUT_BCS_BATCH(batch, 0);
254
255     /* Macroblock status & ILDB 52-57 */
256     OUT_BCS_BATCH(batch, 0);
257     OUT_BCS_BATCH(batch, 0);
258     OUT_BCS_BATCH(batch, 0);
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262
263     /* the second Macroblock status 58-60 */
264     OUT_BCS_BATCH(batch, 0);
265     OUT_BCS_BATCH(batch, 0);
266     OUT_BCS_BATCH(batch, 0);
267     ADVANCE_BCS_BATCH(batch);
268 }
269
270 static void
271 gen75_mfd_pipe_buf_addr_state(VADriverContextP ctx,
272                               struct decode_state *decode_state,
273                               int standard_select,
274                               struct gen7_mfd_context *gen7_mfd_context)
275 {
276     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
277     struct i965_driver_data *i965 = i965_driver_data(ctx);
278     int i;
279
280     if (IS_STEPPING_BPLUS(i965)) {
281         gen75_mfd_pipe_buf_addr_state_bplus(ctx, decode_state,
282                                             standard_select, gen7_mfd_context);
283         return;
284     }
285
286     BEGIN_BCS_BATCH(batch, 25);
287     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
288     if (gen7_mfd_context->pre_deblocking_output.valid)
289         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
290                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
291                       0);
292     else
293         OUT_BCS_BATCH(batch, 0);
294
295     if (gen7_mfd_context->post_deblocking_output.valid)
296         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
297                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
298                       0);
299     else
300         OUT_BCS_BATCH(batch, 0);
301
302     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
303     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
304
305     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
306         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
307                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
308                       0);
309     else
310         OUT_BCS_BATCH(batch, 0);
311
312     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
313         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
314                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
315                       0);
316     else
317         OUT_BCS_BATCH(batch, 0);
318
319     /* DW 7..22 */
320     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
321         struct object_surface *obj_surface;
322
323         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
324             gen7_mfd_context->reference_surface[i].obj_surface &&
325             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
326             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
327
328             OUT_BCS_RELOC(batch, obj_surface->bo,
329                           I915_GEM_DOMAIN_INSTRUCTION, 0,
330                           0);
331         } else {
332             OUT_BCS_BATCH(batch, 0);
333         }
334     }
335
336     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
337     OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
338     ADVANCE_BCS_BATCH(batch);
339 }
340
341 static void
342 gen75_mfd_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
343                                         dri_bo *slice_data_bo,
344                                         int standard_select,
345                                         struct gen7_mfd_context *gen7_mfd_context)
346 {
347     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
348
349     BEGIN_BCS_BATCH(batch, 26);
350     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
351     /* MFX In BS 1-5 */
352     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
353     OUT_BCS_BATCH(batch, 0);
354     OUT_BCS_BATCH(batch, 0);
355     /* Upper bound 4-5 */
356     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
357     OUT_BCS_BATCH(batch, 0);
358
359     /* MFX indirect MV 6-10 */
360     OUT_BCS_BATCH(batch, 0);
361     OUT_BCS_BATCH(batch, 0);
362     OUT_BCS_BATCH(batch, 0);
363     OUT_BCS_BATCH(batch, 0);
364     OUT_BCS_BATCH(batch, 0);
365
366     /* MFX IT_COFF 11-15 */
367     OUT_BCS_BATCH(batch, 0);
368     OUT_BCS_BATCH(batch, 0);
369     OUT_BCS_BATCH(batch, 0);
370     OUT_BCS_BATCH(batch, 0);
371     OUT_BCS_BATCH(batch, 0);
372
373     /* MFX IT_DBLK 16-20 */
374     OUT_BCS_BATCH(batch, 0);
375     OUT_BCS_BATCH(batch, 0);
376     OUT_BCS_BATCH(batch, 0);
377     OUT_BCS_BATCH(batch, 0);
378     OUT_BCS_BATCH(batch, 0);
379
380     /* MFX PAK_BSE object for encoder 21-25 */
381     OUT_BCS_BATCH(batch, 0);
382     OUT_BCS_BATCH(batch, 0);
383     OUT_BCS_BATCH(batch, 0);
384     OUT_BCS_BATCH(batch, 0);
385     OUT_BCS_BATCH(batch, 0);
386
387     ADVANCE_BCS_BATCH(batch);
388 }
389
390 static void
391 gen75_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
392                                   dri_bo *slice_data_bo,
393                                   int standard_select,
394                                   struct gen7_mfd_context *gen7_mfd_context)
395 {
396     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
397     struct i965_driver_data *i965 = i965_driver_data(ctx);
398
399     if (IS_STEPPING_BPLUS(i965)) {
400         gen75_mfd_ind_obj_base_addr_state_bplus(ctx, slice_data_bo,
401                                                 standard_select, gen7_mfd_context);
402         return;
403     }
404
405     BEGIN_BCS_BATCH(batch, 11);
406     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
407     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
408     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
409     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
410     OUT_BCS_BATCH(batch, 0);
411     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
412     OUT_BCS_BATCH(batch, 0);
413     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
414     OUT_BCS_BATCH(batch, 0);
415     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
416     OUT_BCS_BATCH(batch, 0);
417     ADVANCE_BCS_BATCH(batch);
418 }
419
420 static void
421 gen75_mfd_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
422                                         struct decode_state *decode_state,
423                                         int standard_select,
424                                         struct gen7_mfd_context *gen7_mfd_context)
425 {
426     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
427
428     BEGIN_BCS_BATCH(batch, 10);
429     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
430
431     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
432         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
433                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
434                       0);
435     else
436         OUT_BCS_BATCH(batch, 0);
437
438     OUT_BCS_BATCH(batch, 0);
439     OUT_BCS_BATCH(batch, 0);
440     /* MPR Row Store Scratch buffer 4-6 */
441     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
442         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
443                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
444                       0);
445     else
446         OUT_BCS_BATCH(batch, 0);
447     OUT_BCS_BATCH(batch, 0);
448     OUT_BCS_BATCH(batch, 0);
449
450     /* Bitplane 7-9 */
451     if (gen7_mfd_context->bitplane_read_buffer.valid)
452         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
453                       I915_GEM_DOMAIN_INSTRUCTION, 0,
454                       0);
455     else
456         OUT_BCS_BATCH(batch, 0);
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459
460     ADVANCE_BCS_BATCH(batch);
461 }
462
463 static void
464 gen75_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
465                                   struct decode_state *decode_state,
466                                   int standard_select,
467                                   struct gen7_mfd_context *gen7_mfd_context)
468 {
469     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
470     struct i965_driver_data *i965 = i965_driver_data(ctx);
471
472     if (IS_STEPPING_BPLUS(i965)) {
473         gen75_mfd_bsp_buf_base_addr_state_bplus(ctx, decode_state,
474                                                 standard_select, gen7_mfd_context);
475         return;
476     }
477
478     BEGIN_BCS_BATCH(batch, 4);
479     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
480
481     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
482         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
483                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
484                       0);
485     else
486         OUT_BCS_BATCH(batch, 0);
487
488     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
489         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
490                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
491                       0);
492     else
493         OUT_BCS_BATCH(batch, 0);
494
495     if (gen7_mfd_context->bitplane_read_buffer.valid)
496         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
497                       I915_GEM_DOMAIN_INSTRUCTION, 0,
498                       0);
499     else
500         OUT_BCS_BATCH(batch, 0);
501
502     ADVANCE_BCS_BATCH(batch);
503 }
504
505 static void
506 gen75_mfd_qm_state(VADriverContextP ctx,
507                    int qm_type,
508                    unsigned char *qm,
509                    int qm_length,
510                    struct gen7_mfd_context *gen7_mfd_context)
511 {
512     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
513     unsigned int qm_buffer[16];
514
515     assert(qm_length <= 16 * 4);
516     memcpy(qm_buffer, qm, qm_length);
517
518     BEGIN_BCS_BATCH(batch, 18);
519     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
520     OUT_BCS_BATCH(batch, qm_type << 0);
521     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
522     ADVANCE_BCS_BATCH(batch);
523 }
524
525 static void
526 gen75_mfd_avc_img_state(VADriverContextP ctx,
527                         struct decode_state *decode_state,
528                         struct gen7_mfd_context *gen7_mfd_context)
529 {
530     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
531     int img_struct;
532     int mbaff_frame_flag;
533     unsigned int width_in_mbs, height_in_mbs;
534     VAPictureParameterBufferH264 *pic_param;
535
536     assert(decode_state->pic_param && decode_state->pic_param->buffer);
537     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
538
539     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
540
541     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
542         img_struct = 1;
543     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
544         img_struct = 3;
545     else
546         img_struct = 0;
547
548     if ((img_struct & 0x1) == 0x1) {
549         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
550     } else {
551         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
552     }
553
554     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
555         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
556         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
557     } else {
558         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
559     }
560
561     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
562                         !pic_param->pic_fields.bits.field_pic_flag);
563
564     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
565     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
566
567     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
568     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
569            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
570     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
571
572     BEGIN_BCS_BATCH(batch, 17);
573     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
574     OUT_BCS_BATCH(batch,
575                   (width_in_mbs * height_in_mbs - 1));
576     OUT_BCS_BATCH(batch,
577                   ((height_in_mbs - 1) << 16) |
578                   ((width_in_mbs - 1) << 0));
579     OUT_BCS_BATCH(batch,
580                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
581                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
582                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
583                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
584                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
585                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
586                   (img_struct << 8));
587     OUT_BCS_BATCH(batch,
588                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
589                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
590                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
591                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
592                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
593                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
594                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
595                   (mbaff_frame_flag << 1) |
596                   (pic_param->pic_fields.bits.field_pic_flag << 0));
597     OUT_BCS_BATCH(batch, 0);
598     OUT_BCS_BATCH(batch, 0);
599     OUT_BCS_BATCH(batch, 0);
600     OUT_BCS_BATCH(batch, 0);
601     OUT_BCS_BATCH(batch, 0);
602     OUT_BCS_BATCH(batch, 0);
603     OUT_BCS_BATCH(batch, 0);
604     OUT_BCS_BATCH(batch, 0);
605     OUT_BCS_BATCH(batch, 0);
606     OUT_BCS_BATCH(batch, 0);
607     OUT_BCS_BATCH(batch, 0);
608     OUT_BCS_BATCH(batch, 0);
609     ADVANCE_BCS_BATCH(batch);
610 }
611
612 static void
613 gen75_mfd_avc_qm_state(VADriverContextP ctx,
614                        struct decode_state *decode_state,
615                        struct gen7_mfd_context *gen7_mfd_context)
616 {
617     VAIQMatrixBufferH264 *iq_matrix;
618     VAPictureParameterBufferH264 *pic_param;
619
620     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
621         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
622     else
623         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
624
625     assert(decode_state->pic_param && decode_state->pic_param->buffer);
626     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
627
628     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
629     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
630
631     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
632         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
633         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
634     }
635 }
636
637 static inline void
638 gen75_mfd_avc_picid_state(VADriverContextP ctx,
639                           struct decode_state *decode_state,
640                           struct gen7_mfd_context *gen7_mfd_context)
641 {
642     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
643                                gen7_mfd_context->reference_surface);
644 }
645
646 static void
647 gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
648                                      struct decode_state *decode_state,
649                                      VAPictureParameterBufferH264 *pic_param,
650                                      VASliceParameterBufferH264 *slice_param,
651                                      struct gen7_mfd_context *gen7_mfd_context)
652 {
653     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
654     struct object_surface *obj_surface;
655     GenAvcSurface *gen7_avc_surface;
656     VAPictureH264 *va_pic;
657     int i;
658
659     BEGIN_BCS_BATCH(batch, 71);
660     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
661
662     /* reference surfaces 0..15 */
663     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
664         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
665             gen7_mfd_context->reference_surface[i].obj_surface &&
666             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
667
668             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
669             gen7_avc_surface = obj_surface->private_data;
670             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
671                           I915_GEM_DOMAIN_INSTRUCTION, 0,
672                           0);
673             OUT_BCS_BATCH(batch, 0);
674         } else {
675             OUT_BCS_BATCH(batch, 0);
676             OUT_BCS_BATCH(batch, 0);
677         }
678     }
679
680     OUT_BCS_BATCH(batch, 0);
681
682     /* the current decoding frame/field */
683     va_pic = &pic_param->CurrPic;
684     obj_surface = decode_state->render_object;
685     assert(obj_surface->bo && obj_surface->private_data);
686     gen7_avc_surface = obj_surface->private_data;
687
688     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
689                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
690                   0);
691
692     OUT_BCS_BATCH(batch, 0);
693     OUT_BCS_BATCH(batch, 0);
694
695     /* POC List */
696     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
697         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
698
699         if (obj_surface) {
700             const VAPictureH264 * const va_pic = avc_find_picture(
701                                                      obj_surface->base.id, pic_param->ReferenceFrames,
702                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
703
704             assert(va_pic != NULL);
705             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
706             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
707         } else {
708             OUT_BCS_BATCH(batch, 0);
709             OUT_BCS_BATCH(batch, 0);
710         }
711     }
712
713     va_pic = &pic_param->CurrPic;
714     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
715     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
716
717     ADVANCE_BCS_BATCH(batch);
718 }
719
720 static void
721 gen75_mfd_avc_directmode_state(VADriverContextP ctx,
722                                struct decode_state *decode_state,
723                                VAPictureParameterBufferH264 *pic_param,
724                                VASliceParameterBufferH264 *slice_param,
725                                struct gen7_mfd_context *gen7_mfd_context)
726 {
727     struct i965_driver_data *i965 = i965_driver_data(ctx);
728     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
729     struct object_surface *obj_surface;
730     GenAvcSurface *gen7_avc_surface;
731     VAPictureH264 *va_pic;
732     int i;
733
734     if (IS_STEPPING_BPLUS(i965)) {
735         gen75_mfd_avc_directmode_state_bplus(ctx, decode_state, pic_param, slice_param,
736                                              gen7_mfd_context);
737
738         return;
739     }
740
741     BEGIN_BCS_BATCH(batch, 69);
742     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
743
744     /* reference surfaces 0..15 */
745     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
746         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
747             gen7_mfd_context->reference_surface[i].obj_surface &&
748             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
749
750             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
751             gen7_avc_surface = obj_surface->private_data;
752
753             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
754                           I915_GEM_DOMAIN_INSTRUCTION, 0,
755                           0);
756
757             if (gen7_avc_surface->dmv_bottom_flag == 1)
758                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
759                               I915_GEM_DOMAIN_INSTRUCTION, 0,
760                               0);
761             else
762                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
763                               I915_GEM_DOMAIN_INSTRUCTION, 0,
764                               0);
765         } else {
766             OUT_BCS_BATCH(batch, 0);
767             OUT_BCS_BATCH(batch, 0);
768         }
769     }
770
771     /* the current decoding frame/field */
772     va_pic = &pic_param->CurrPic;
773     obj_surface = decode_state->render_object;
774     assert(obj_surface->bo && obj_surface->private_data);
775     gen7_avc_surface = obj_surface->private_data;
776
777     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
778                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
779                   0);
780
781     if (gen7_avc_surface->dmv_bottom_flag == 1)
782         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
783                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
784                       0);
785     else
786         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
787                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
788                       0);
789
790     /* POC List */
791     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
792         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
793
794         if (obj_surface) {
795             const VAPictureH264 * const va_pic = avc_find_picture(
796                                                      obj_surface->base.id, pic_param->ReferenceFrames,
797                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
798
799             assert(va_pic != NULL);
800             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
801             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
802         } else {
803             OUT_BCS_BATCH(batch, 0);
804             OUT_BCS_BATCH(batch, 0);
805         }
806     }
807
808     va_pic = &pic_param->CurrPic;
809     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
810     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
811
812     ADVANCE_BCS_BATCH(batch);
813 }
814
815 static void
816 gen75_mfd_avc_phantom_slice_first(VADriverContextP ctx,
817                                   VAPictureParameterBufferH264 *pic_param,
818                                   VASliceParameterBufferH264 *next_slice_param,
819                                   struct gen7_mfd_context *gen7_mfd_context)
820 {
821     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
822 }
823
824 static void
825 gen75_mfd_avc_slice_state(VADriverContextP ctx,
826                           VAPictureParameterBufferH264 *pic_param,
827                           VASliceParameterBufferH264 *slice_param,
828                           VASliceParameterBufferH264 *next_slice_param,
829                           struct gen7_mfd_context *gen7_mfd_context)
830 {
831     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
832     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
833     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
834     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
835     int num_ref_idx_l0, num_ref_idx_l1;
836     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
837                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
838     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
839     int slice_type;
840
841     if (slice_param->slice_type == SLICE_TYPE_I ||
842         slice_param->slice_type == SLICE_TYPE_SI) {
843         slice_type = SLICE_TYPE_I;
844     } else if (slice_param->slice_type == SLICE_TYPE_P ||
845                slice_param->slice_type == SLICE_TYPE_SP) {
846         slice_type = SLICE_TYPE_P;
847     } else {
848         assert(slice_param->slice_type == SLICE_TYPE_B);
849         slice_type = SLICE_TYPE_B;
850     }
851
852     if (slice_type == SLICE_TYPE_I) {
853         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
854         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
855         num_ref_idx_l0 = 0;
856         num_ref_idx_l1 = 0;
857     } else if (slice_type == SLICE_TYPE_P) {
858         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
859         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
860         num_ref_idx_l1 = 0;
861     } else {
862         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
863         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
864     }
865
866     first_mb_in_slice = slice_param->first_mb_in_slice;
867     slice_hor_pos = first_mb_in_slice % width_in_mbs;
868     slice_ver_pos = first_mb_in_slice / width_in_mbs;
869
870     if (mbaff_picture)
871         slice_ver_pos = slice_ver_pos << 1;
872
873     if (next_slice_param) {
874         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
875         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
876         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
877
878         if (mbaff_picture)
879             next_slice_ver_pos = next_slice_ver_pos << 1;
880     } else {
881         next_slice_hor_pos = 0;
882         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
883     }
884
885     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
886     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
887     OUT_BCS_BATCH(batch, slice_type);
888     OUT_BCS_BATCH(batch,
889                   (num_ref_idx_l1 << 24) |
890                   (num_ref_idx_l0 << 16) |
891                   (slice_param->chroma_log2_weight_denom << 8) |
892                   (slice_param->luma_log2_weight_denom << 0));
893     OUT_BCS_BATCH(batch,
894                   (slice_param->direct_spatial_mv_pred_flag << 29) |
895                   (slice_param->disable_deblocking_filter_idc << 27) |
896                   (slice_param->cabac_init_idc << 24) |
897                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
898                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
899                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
900     OUT_BCS_BATCH(batch,
901                   (slice_ver_pos << 24) |
902                   (slice_hor_pos << 16) |
903                   (first_mb_in_slice << 0));
904     OUT_BCS_BATCH(batch,
905                   (next_slice_ver_pos << 16) |
906                   (next_slice_hor_pos << 0));
907     OUT_BCS_BATCH(batch,
908                   (next_slice_param == NULL) << 19); /* last slice flag */
909     OUT_BCS_BATCH(batch, 0);
910     OUT_BCS_BATCH(batch, 0);
911     OUT_BCS_BATCH(batch, 0);
912     OUT_BCS_BATCH(batch, 0);
913     ADVANCE_BCS_BATCH(batch);
914 }
915
916 static inline void
917 gen75_mfd_avc_ref_idx_state(VADriverContextP ctx,
918                             VAPictureParameterBufferH264 *pic_param,
919                             VASliceParameterBufferH264 *slice_param,
920                             struct gen7_mfd_context *gen7_mfd_context)
921 {
922     gen6_send_avc_ref_idx_state(
923         gen7_mfd_context->base.batch,
924         slice_param,
925         gen7_mfd_context->reference_surface
926     );
927 }
928
929 static void
930 gen75_mfd_avc_weightoffset_state(VADriverContextP ctx,
931                                  VAPictureParameterBufferH264 *pic_param,
932                                  VASliceParameterBufferH264 *slice_param,
933                                  struct gen7_mfd_context *gen7_mfd_context)
934 {
935     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
936     int i, j, num_weight_offset_table = 0;
937     short weightoffsets[32 * 6];
938
939     if ((slice_param->slice_type == SLICE_TYPE_P ||
940          slice_param->slice_type == SLICE_TYPE_SP) &&
941         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
942         num_weight_offset_table = 1;
943     }
944
945     if ((slice_param->slice_type == SLICE_TYPE_B) &&
946         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
947         num_weight_offset_table = 2;
948     }
949
950     for (i = 0; i < num_weight_offset_table; i++) {
951         BEGIN_BCS_BATCH(batch, 98);
952         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
953         OUT_BCS_BATCH(batch, i);
954
955         if (i == 0) {
956             for (j = 0; j < 32; j++) {
957                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
958                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
959                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
960                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
961                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
962                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
963             }
964         } else {
965             for (j = 0; j < 32; j++) {
966                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
967                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
968                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
969                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
970                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
971                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
972             }
973         }
974
975         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
976         ADVANCE_BCS_BATCH(batch);
977     }
978 }
979
980 static void
981 gen75_mfd_avc_bsd_object(VADriverContextP ctx,
982                          VAPictureParameterBufferH264 *pic_param,
983                          VASliceParameterBufferH264 *slice_param,
984                          dri_bo *slice_data_bo,
985                          VASliceParameterBufferH264 *next_slice_param,
986                          struct gen7_mfd_context *gen7_mfd_context)
987 {
988     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
989     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
990                                                             slice_param,
991                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
992
993     /* the input bitsteam format on GEN7 differs from GEN6 */
994     BEGIN_BCS_BATCH(batch, 6);
995     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
996     OUT_BCS_BATCH(batch,
997                   (slice_param->slice_data_size - slice_param->slice_data_offset));
998     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
999     OUT_BCS_BATCH(batch,
1000                   (0 << 31) |
1001                   (0 << 14) |
1002                   (0 << 12) |
1003                   (0 << 10) |
1004                   (0 << 8));
1005     OUT_BCS_BATCH(batch,
1006                   ((slice_data_bit_offset >> 3) << 16) |
1007                   (1 << 7)  |
1008                   (0 << 5)  |
1009                   (0 << 4)  |
1010                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
1011                   (slice_data_bit_offset & 0x7));
1012     OUT_BCS_BATCH(batch, 0);
1013     ADVANCE_BCS_BATCH(batch);
1014 }
1015
1016 static inline void
1017 gen75_mfd_avc_context_init(
1018     VADriverContextP         ctx,
1019     struct gen7_mfd_context *gen7_mfd_context
1020 )
1021 {
1022     /* Initialize flat scaling lists */
1023     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
1024 }
1025
1026 static void
1027 gen75_mfd_avc_decode_init(VADriverContextP ctx,
1028                           struct decode_state *decode_state,
1029                           struct gen7_mfd_context *gen7_mfd_context)
1030 {
1031     VAPictureParameterBufferH264 *pic_param;
1032     VASliceParameterBufferH264 *slice_param;
1033     struct i965_driver_data *i965 = i965_driver_data(ctx);
1034     struct object_surface *obj_surface;
1035     dri_bo *bo;
1036     int i, j, enable_avc_ildb = 0;
1037     unsigned int width_in_mbs, height_in_mbs;
1038
1039     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
1040         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1041         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1042
1043         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1044             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1045             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1046                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1047                    (slice_param->slice_type == SLICE_TYPE_P) ||
1048                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1049                    (slice_param->slice_type == SLICE_TYPE_B));
1050
1051             if (slice_param->disable_deblocking_filter_idc != 1) {
1052                 enable_avc_ildb = 1;
1053                 break;
1054             }
1055
1056             slice_param++;
1057         }
1058     }
1059
1060     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1061     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1062     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
1063                                        gen7_mfd_context->reference_surface);
1064     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
1065     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
1066     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
1067     assert(height_in_mbs > 0 && height_in_mbs <= 256);
1068
1069     /* Current decoded picture */
1070     obj_surface = decode_state->render_object;
1071     if (pic_param->pic_fields.bits.reference_pic_flag)
1072         obj_surface->flags |= SURFACE_REFERENCED;
1073     else
1074         obj_surface->flags &= ~SURFACE_REFERENCED;
1075
1076     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
1077     gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
1078
1079     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1080     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1081     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1082     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
1083
1084     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1085     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1086     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1087     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1088
1089     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1090     bo = dri_bo_alloc(i965->intel.bufmgr,
1091                       "intra row store",
1092                       width_in_mbs * 64,
1093                       0x1000);
1094     assert(bo);
1095     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1096     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1097
1098     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1099     bo = dri_bo_alloc(i965->intel.bufmgr,
1100                       "deblocking filter row store",
1101                       width_in_mbs * 64 * 4,
1102                       0x1000);
1103     assert(bo);
1104     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1105     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1106
1107     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1108     bo = dri_bo_alloc(i965->intel.bufmgr,
1109                       "bsd mpc row store",
1110                       width_in_mbs * 64 * 2,
1111                       0x1000);
1112     assert(bo);
1113     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1114     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1115
1116     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
1117     bo = dri_bo_alloc(i965->intel.bufmgr,
1118                       "mpr row store",
1119                       width_in_mbs * 64 * 2,
1120                       0x1000);
1121     assert(bo);
1122     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1123     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1124
1125     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1126 }
1127
1128 static void
1129 gen75_mfd_avc_decode_picture(VADriverContextP ctx,
1130                              struct decode_state *decode_state,
1131                              struct gen7_mfd_context *gen7_mfd_context)
1132 {
1133     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1134     VAPictureParameterBufferH264 *pic_param;
1135     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1136     dri_bo *slice_data_bo;
1137     int i, j;
1138
1139     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1140     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1141     gen75_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
1142
1143     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1144     intel_batchbuffer_emit_mi_flush(batch);
1145     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1146     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1147     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1148     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1149     gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
1150     gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
1151     gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
1152
1153     for (j = 0; j < decode_state->num_slice_params; j++) {
1154         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1155         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1156         slice_data_bo = decode_state->slice_datas[j]->bo;
1157         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
1158
1159         if (j == decode_state->num_slice_params - 1)
1160             next_slice_group_param = NULL;
1161         else
1162             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1163
1164         if (j == 0 && slice_param->first_mb_in_slice)
1165             gen75_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
1166
1167         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1168             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1169             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1170                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1171                    (slice_param->slice_type == SLICE_TYPE_P) ||
1172                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1173                    (slice_param->slice_type == SLICE_TYPE_B));
1174
1175             if (i < decode_state->slice_params[j]->num_elements - 1)
1176                 next_slice_param = slice_param + 1;
1177             else
1178                 next_slice_param = next_slice_group_param;
1179
1180             gen75_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
1181             gen75_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
1182             gen75_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
1183             gen75_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1184             gen75_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1185             slice_param++;
1186         }
1187     }
1188
1189     intel_batchbuffer_end_atomic(batch);
1190     intel_batchbuffer_flush(batch);
1191 }
1192
1193 static void
1194 gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
1195                             struct decode_state *decode_state,
1196                             struct gen7_mfd_context *gen7_mfd_context)
1197 {
1198     VAPictureParameterBufferMPEG2 *pic_param;
1199     struct i965_driver_data *i965 = i965_driver_data(ctx);
1200     struct object_surface *obj_surface;
1201     dri_bo *bo;
1202     unsigned int width_in_mbs;
1203
1204     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1205     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1206     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1207
1208     mpeg2_set_reference_surfaces(
1209         ctx,
1210         gen7_mfd_context->reference_surface,
1211         decode_state,
1212         pic_param
1213     );
1214
1215     /* Current decoded picture */
1216     obj_surface = decode_state->render_object;
1217     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1218
1219     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1220     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1221     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1222     gen7_mfd_context->pre_deblocking_output.valid = 1;
1223
1224     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1225     bo = dri_bo_alloc(i965->intel.bufmgr,
1226                       "bsd mpc row store",
1227                       width_in_mbs * 96,
1228                       0x1000);
1229     assert(bo);
1230     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1231     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1232
1233     gen7_mfd_context->post_deblocking_output.valid = 0;
1234     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1235     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1236     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1237     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1238 }
1239
1240 static void
1241 gen75_mfd_mpeg2_pic_state(VADriverContextP ctx,
1242                           struct decode_state *decode_state,
1243                           struct gen7_mfd_context *gen7_mfd_context)
1244 {
1245     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1246     VAPictureParameterBufferMPEG2 *pic_param;
1247     unsigned int slice_concealment_disable_bit = 0;
1248
1249     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1250     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1251
1252     slice_concealment_disable_bit = 1;
1253
1254     BEGIN_BCS_BATCH(batch, 13);
1255     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1256     OUT_BCS_BATCH(batch,
1257                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1258                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1259                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1260                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1261                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1262                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1263                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1264                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1265                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1266                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1267                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1268                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1269     OUT_BCS_BATCH(batch,
1270                   pic_param->picture_coding_type << 9);
1271     OUT_BCS_BATCH(batch,
1272                   (slice_concealment_disable_bit << 31) |
1273                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1274                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1275     OUT_BCS_BATCH(batch, 0);
1276     OUT_BCS_BATCH(batch, 0);
1277     OUT_BCS_BATCH(batch, 0);
1278     OUT_BCS_BATCH(batch, 0);
1279     OUT_BCS_BATCH(batch, 0);
1280     OUT_BCS_BATCH(batch, 0);
1281     OUT_BCS_BATCH(batch, 0);
1282     OUT_BCS_BATCH(batch, 0);
1283     OUT_BCS_BATCH(batch, 0);
1284     ADVANCE_BCS_BATCH(batch);
1285 }
1286
1287 static void
1288 gen75_mfd_mpeg2_qm_state(VADriverContextP ctx,
1289                          struct decode_state *decode_state,
1290                          struct gen7_mfd_context *gen7_mfd_context)
1291 {
1292     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1293     int i, j;
1294
1295     /* Update internal QM state */
1296     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1297         VAIQMatrixBufferMPEG2 * const iq_matrix =
1298             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1299
1300         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1301             iq_matrix->load_intra_quantiser_matrix) {
1302             gen_iq_matrix->load_intra_quantiser_matrix =
1303                 iq_matrix->load_intra_quantiser_matrix;
1304             if (iq_matrix->load_intra_quantiser_matrix) {
1305                 for (j = 0; j < 64; j++)
1306                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1307                         iq_matrix->intra_quantiser_matrix[j];
1308             }
1309         }
1310
1311         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1312             iq_matrix->load_non_intra_quantiser_matrix) {
1313             gen_iq_matrix->load_non_intra_quantiser_matrix =
1314                 iq_matrix->load_non_intra_quantiser_matrix;
1315             if (iq_matrix->load_non_intra_quantiser_matrix) {
1316                 for (j = 0; j < 64; j++)
1317                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1318                         iq_matrix->non_intra_quantiser_matrix[j];
1319             }
1320         }
1321     }
1322
1323     /* Commit QM state to HW */
1324     for (i = 0; i < 2; i++) {
1325         unsigned char *qm = NULL;
1326         int qm_type;
1327
1328         if (i == 0) {
1329             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1330                 qm = gen_iq_matrix->intra_quantiser_matrix;
1331                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1332             }
1333         } else {
1334             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1335                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1336                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1337             }
1338         }
1339
1340         if (!qm)
1341             continue;
1342
1343         gen75_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1344     }
1345 }
1346
1347 static void
1348 gen75_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1349                            VAPictureParameterBufferMPEG2 *pic_param,
1350                            VASliceParameterBufferMPEG2 *slice_param,
1351                            VASliceParameterBufferMPEG2 *next_slice_param,
1352                            struct gen7_mfd_context *gen7_mfd_context)
1353 {
1354     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1355     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1356     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1357
1358     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1359         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1360         is_field_pic = 1;
1361     is_field_pic_wa = is_field_pic &&
1362                       gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1363
1364     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1365     hpos0 = slice_param->slice_horizontal_position;
1366
1367     if (next_slice_param == NULL) {
1368         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1369         hpos1 = 0;
1370     } else {
1371         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1372         hpos1 = next_slice_param->slice_horizontal_position;
1373     }
1374
1375     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1376
1377     BEGIN_BCS_BATCH(batch, 5);
1378     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1379     OUT_BCS_BATCH(batch,
1380                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1381     OUT_BCS_BATCH(batch,
1382                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1383     OUT_BCS_BATCH(batch,
1384                   hpos0 << 24 |
1385                   vpos0 << 16 |
1386                   mb_count << 8 |
1387                   (next_slice_param == NULL) << 5 |
1388                   (next_slice_param == NULL) << 3 |
1389                   (slice_param->macroblock_offset & 0x7));
1390     OUT_BCS_BATCH(batch,
1391                   (slice_param->quantiser_scale_code << 24) |
1392                   (vpos1 << 8 | hpos1));
1393     ADVANCE_BCS_BATCH(batch);
1394 }
1395
1396 static void
1397 gen75_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1398                                struct decode_state *decode_state,
1399                                struct gen7_mfd_context *gen7_mfd_context)
1400 {
1401     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1402     VAPictureParameterBufferMPEG2 *pic_param;
1403     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1404     dri_bo *slice_data_bo;
1405     int i, j;
1406
1407     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1408     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1409
1410     gen75_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1411     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1412     intel_batchbuffer_emit_mi_flush(batch);
1413     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1414     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1415     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1416     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1417     gen75_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1418     gen75_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1419
1420     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1421         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1422             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1423
1424     for (j = 0; j < decode_state->num_slice_params; j++) {
1425         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1426         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1427         slice_data_bo = decode_state->slice_datas[j]->bo;
1428         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1429
1430         if (j == decode_state->num_slice_params - 1)
1431             next_slice_group_param = NULL;
1432         else
1433             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1434
1435         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1436             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1437
1438             if (i < decode_state->slice_params[j]->num_elements - 1)
1439                 next_slice_param = slice_param + 1;
1440             else
1441                 next_slice_param = next_slice_group_param;
1442
1443             gen75_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1444             slice_param++;
1445         }
1446     }
1447
1448     intel_batchbuffer_end_atomic(batch);
1449     intel_batchbuffer_flush(batch);
1450 }
1451
1452 static const int va_to_gen7_vc1_mv[4] = {
1453     1, /* 1-MV */
1454     2, /* 1-MV half-pel */
1455     3, /* 1-MV half-pef bilinear */
1456     0, /* Mixed MV */
1457 };
1458
1459 static const int b_picture_scale_factor[21] = {
1460     128, 85,  170, 64,  192,
1461     51,  102, 153, 204, 43,
1462     215, 37,  74,  111, 148,
1463     185, 222, 32,  96,  160,
1464     224,
1465 };
1466
1467 static const int va_to_gen7_vc1_condover[3] = {
1468     0,
1469     2,
1470     3
1471 };
1472
1473 static const int fptype_to_picture_type[8][2] = {
1474     {GEN7_VC1_I_PICTURE, GEN7_VC1_I_PICTURE},
1475     {GEN7_VC1_I_PICTURE, GEN7_VC1_P_PICTURE},
1476     {GEN7_VC1_P_PICTURE, GEN7_VC1_I_PICTURE},
1477     {GEN7_VC1_P_PICTURE, GEN7_VC1_P_PICTURE},
1478     {GEN7_VC1_B_PICTURE, GEN7_VC1_B_PICTURE},
1479     {GEN7_VC1_B_PICTURE, GEN7_VC1_BI_PICTURE},
1480     {GEN7_VC1_BI_PICTURE, GEN7_VC1_B_PICTURE},
1481     {GEN7_VC1_BI_PICTURE, GEN7_VC1_BI_PICTURE}
1482 };
1483
1484 static void
1485 gen75_mfd_free_vc1_surface(void **data)
1486 {
1487     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1488
1489     if (!gen7_vc1_surface)
1490         return;
1491
1492     dri_bo_unreference(gen7_vc1_surface->dmv_top);
1493     dri_bo_unreference(gen7_vc1_surface->dmv_bottom);
1494     free(gen7_vc1_surface);
1495     *data = NULL;
1496 }
1497
1498 static void
1499 gen75_mfd_init_vc1_surface(VADriverContextP ctx,
1500                            VAPictureParameterBufferVC1 *pic_param,
1501                            struct object_surface *obj_surface)
1502 {
1503     struct i965_driver_data *i965 = i965_driver_data(ctx);
1504     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1505     int height_in_mbs;
1506     int picture_type;
1507     int is_first_field = 1;
1508
1509     if (!pic_param->sequence_fields.bits.interlace ||
1510         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1511         picture_type = pic_param->picture_fields.bits.picture_type;
1512     } else {/* Field-Interlace */
1513         is_first_field = pic_param->picture_fields.bits.is_first_field;
1514         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1515     }
1516
1517     obj_surface->free_private_data = gen75_mfd_free_vc1_surface;
1518
1519     if (!gen7_vc1_surface) {
1520         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1521         assert(gen7_vc1_surface);
1522         assert((obj_surface->size & 0x3f) == 0);
1523         obj_surface->private_data = gen7_vc1_surface;
1524     }
1525
1526     if (!pic_param->sequence_fields.bits.interlace ||
1527         pic_param->picture_fields.bits.frame_coding_mode < 2 || /* Progressive or Frame-Interlace */
1528         is_first_field) {
1529         gen7_vc1_surface->picture_type_top = 0;
1530         gen7_vc1_surface->picture_type_bottom = 0;
1531         gen7_vc1_surface->intensity_compensation_top = 0;
1532         gen7_vc1_surface->intensity_compensation_bottom = 0;
1533         gen7_vc1_surface->luma_scale_top[0] = 0;
1534         gen7_vc1_surface->luma_scale_top[1] = 0;
1535         gen7_vc1_surface->luma_scale_bottom[0] = 0;
1536         gen7_vc1_surface->luma_scale_bottom[1] = 0;
1537         gen7_vc1_surface->luma_shift_top[0] = 0;
1538         gen7_vc1_surface->luma_shift_top[1] = 0;
1539         gen7_vc1_surface->luma_shift_bottom[0] = 0;
1540         gen7_vc1_surface->luma_shift_bottom[1] = 0;
1541     }
1542
1543     if (!pic_param->sequence_fields.bits.interlace ||
1544         pic_param->picture_fields.bits.frame_coding_mode < 2) { /* Progressive or Frame-Interlace */
1545         gen7_vc1_surface->picture_type_top = picture_type;
1546         gen7_vc1_surface->picture_type_bottom = picture_type;
1547     } else if (pic_param->picture_fields.bits.top_field_first ^ is_first_field)
1548         gen7_vc1_surface->picture_type_bottom = picture_type;
1549     else
1550         gen7_vc1_surface->picture_type_top = picture_type;
1551
1552     /*
1553      * The Direct MV buffer is scalable with frame height, but
1554      * does not scale with frame width as the hardware assumes
1555      * that frame width is fixed at 128 MBs.
1556      */
1557
1558     if (gen7_vc1_surface->dmv_top == NULL) {
1559         height_in_mbs = ALIGN(obj_surface->orig_height, 16) / 16;
1560         gen7_vc1_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
1561                                              "direct mv w/r buffer",
1562                                              128 * height_in_mbs * 64,
1563                                              0x1000);
1564     }
1565
1566     if (pic_param->sequence_fields.bits.interlace &&
1567         gen7_vc1_surface->dmv_bottom == NULL) {
1568         height_in_mbs = ALIGN(obj_surface->orig_height, 32) / 32;
1569         gen7_vc1_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
1570                                              "direct mv w/r buffer",
1571                                              128 * height_in_mbs * 64,
1572                                              0x1000);
1573     }
1574 }
1575
1576 static void
1577 gen75_mfd_vc1_decode_init(VADriverContextP ctx,
1578                           struct decode_state *decode_state,
1579                           struct gen7_mfd_context *gen7_mfd_context)
1580 {
1581     VAPictureParameterBufferVC1 *pic_param;
1582     struct i965_driver_data *i965 = i965_driver_data(ctx);
1583     struct object_surface *obj_surface;
1584     struct gen7_vc1_surface *gen7_vc1_current_surface;
1585     struct gen7_vc1_surface *gen7_vc1_forward_surface;
1586     dri_bo *bo;
1587     int width_in_mbs;
1588     int picture_type;
1589     int is_first_field = 1;
1590     int i;
1591
1592     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1593     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1594     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1595
1596     if (!pic_param->sequence_fields.bits.interlace ||
1597         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1598         picture_type = pic_param->picture_fields.bits.picture_type;
1599     } else {/* Field-Interlace */
1600         is_first_field = pic_param->picture_fields.bits.is_first_field;
1601         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1602     }
1603
1604     /* Current decoded picture */
1605     obj_surface = decode_state->render_object;
1606     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1607     gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1608
1609     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1610     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1611     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1612
1613     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1614     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1615     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1616
1617     if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1618         gen7_mfd_context->post_deblocking_output.valid = 0;
1619         gen7_mfd_context->pre_deblocking_output.valid = 1;
1620     } else {
1621         gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1622         gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1623     }
1624
1625     intel_update_vc1_frame_store_index(ctx,
1626                                        decode_state,
1627                                        pic_param,
1628                                        gen7_mfd_context->reference_surface);
1629
1630     if (picture_type == GEN7_VC1_P_PICTURE) {
1631         obj_surface = decode_state->reference_objects[0];
1632         gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1633         if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1634             obj_surface)
1635             gen7_vc1_forward_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1636         else
1637             gen7_vc1_forward_surface = NULL;
1638
1639         if (!pic_param->sequence_fields.bits.interlace ||
1640             pic_param->picture_fields.bits.frame_coding_mode == 0) { /* Progressive */
1641             if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1642                 if (gen7_vc1_forward_surface) {
1643                     gen7_vc1_forward_surface->intensity_compensation_top = 1;
1644                     gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
1645                     gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
1646                     gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
1647                     gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
1648                     gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
1649                 }
1650             }
1651         } else if (pic_param->sequence_fields.bits.interlace &&
1652             pic_param->picture_fields.bits.frame_coding_mode == 1) { /* Frame-Interlace */
1653             if (pic_param->picture_fields.bits.intensity_compensation) {
1654                 if (gen7_vc1_forward_surface) {
1655                     gen7_vc1_forward_surface->intensity_compensation_top = 1;
1656                     gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
1657                     gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
1658                     gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
1659                     gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
1660                     gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
1661                 }
1662             }
1663         } else if (pic_param->sequence_fields.bits.interlace &&
1664                    pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
1665             if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1666                 if (pic_param->intensity_compensation_field == 1 || /* Top field */
1667                     pic_param->intensity_compensation_field == 0) { /* Both fields */
1668                     if (is_first_field) {
1669                         if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1670                              (pic_param->reference_fields.bits.reference_field_pic_indicator ==
1671                              pic_param->picture_fields.bits.top_field_first)) ||
1672                             pic_param->reference_fields.bits.num_reference_pictures) {
1673                             if (gen7_vc1_forward_surface) {
1674                                 i = gen7_vc1_forward_surface->intensity_compensation_top++;
1675                                 gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
1676                                 gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
1677                             }
1678                         }
1679                     } else { /* Second field */
1680                         if (pic_param->picture_fields.bits.top_field_first) {
1681                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1682                                  !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1683                                 pic_param->reference_fields.bits.num_reference_pictures) {
1684                                 i = gen7_vc1_current_surface->intensity_compensation_top++;
1685                                 gen7_vc1_current_surface->luma_scale_top[i] = pic_param->luma_scale;
1686                                 gen7_vc1_current_surface->luma_shift_top[i] = pic_param->luma_shift;
1687                             }
1688                         } else {
1689                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1690                                  pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1691                                 pic_param->reference_fields.bits.num_reference_pictures) {
1692                                 if (gen7_vc1_forward_surface) {
1693                                     i = gen7_vc1_forward_surface->intensity_compensation_top++;
1694                                     gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
1695                                     gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
1696                                 }
1697                             }
1698                         }
1699                     }
1700                 }
1701                 if (pic_param->intensity_compensation_field == 2 || /* Bottom field */
1702                     pic_param->intensity_compensation_field == 0) { /* Both fields */
1703                     if (is_first_field) {
1704                         if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1705                              (pic_param->reference_fields.bits.reference_field_pic_indicator ^
1706                               pic_param->picture_fields.bits.top_field_first)) ||
1707                             pic_param->reference_fields.bits.num_reference_pictures) {
1708                             if (gen7_vc1_forward_surface) {
1709                                 i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
1710                                 if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1711                                     gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1712                                     gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1713                                 } else { /* Both fields */
1714                                     gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1715                                     gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1716                                 }
1717                             }
1718                         }
1719                     } else { /* Second field */
1720                         if (pic_param->picture_fields.bits.top_field_first) {
1721                             if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1722                                  pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1723                                 pic_param->reference_fields.bits.num_reference_pictures) {
1724                                 if (gen7_vc1_forward_surface) {
1725                                     i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
1726                                     if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1727                                         gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1728                                         gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1729                                     } else { /* Both fields */
1730                                         gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1731                                         gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1732                                     }
1733                                 }
1734                             }
1735                         } else {
1736                            if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1737                                  !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1738                                 pic_param->reference_fields.bits.num_reference_pictures) {
1739                                 i = gen7_vc1_current_surface->intensity_compensation_bottom++;
1740                                if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1741                                    gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1742                                    gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1743                                 } else { /* Both fields */
1744                                     gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1745                                     gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1746                                 }
1747                             }
1748                         }
1749                     }
1750                 }
1751             }
1752         }
1753     }
1754
1755     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1756     bo = dri_bo_alloc(i965->intel.bufmgr,
1757                       "intra row store",
1758                       width_in_mbs * 64,
1759                       0x1000);
1760     assert(bo);
1761     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1762     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1763
1764     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1765     bo = dri_bo_alloc(i965->intel.bufmgr,
1766                       "deblocking filter row store",
1767                       width_in_mbs * 7 * 64,
1768                       0x1000);
1769     assert(bo);
1770     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1771     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1772
1773     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1774     bo = dri_bo_alloc(i965->intel.bufmgr,
1775                       "bsd mpc row store",
1776                       width_in_mbs * 96,
1777                       0x1000);
1778     assert(bo);
1779     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1780     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1781
1782     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1783
1784     if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1785         gen7_mfd_context->bitplane_read_buffer.valid = 1;
1786     else
1787         gen7_mfd_context->bitplane_read_buffer.valid = !!(pic_param->bitplane_present.value & 0x7f);
1788     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1789
1790     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1791         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1792         int height_in_mbs;
1793         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1794         int src_w, src_h;
1795         uint8_t *src = NULL, *dst = NULL;
1796
1797         if (!pic_param->sequence_fields.bits.interlace ||
1798             (pic_param->picture_fields.bits.frame_coding_mode < 2)) /* Progressive or Frame-Interlace */
1799             height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1800         else /* Field-Interlace */
1801             height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
1802
1803         bo = dri_bo_alloc(i965->intel.bufmgr,
1804                           "VC-1 Bitplane",
1805                           bitplane_width * height_in_mbs,
1806                           0x1000);
1807         assert(bo);
1808         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1809
1810         dri_bo_map(bo, True);
1811         assert(bo->virtual);
1812         dst = bo->virtual;
1813
1814         if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1815             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1816                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1817                     int dst_index;
1818                     uint8_t src_value = 0x2;
1819
1820                     dst_index = src_w / 2;
1821                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1822                 }
1823
1824                 if (src_w & 1)
1825                     dst[src_w / 2] >>= 4;
1826
1827                 dst += bitplane_width;
1828             }
1829         } else {
1830             assert(decode_state->bit_plane->buffer);
1831             src = decode_state->bit_plane->buffer;
1832
1833             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1834                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1835                     int src_index, dst_index;
1836                     int src_shift;
1837                     uint8_t src_value;
1838
1839                     src_index = (src_h * width_in_mbs + src_w) / 2;
1840                     src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1841                     src_value = ((src[src_index] >> src_shift) & 0xf);
1842
1843                     dst_index = src_w / 2;
1844                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1845                 }
1846
1847                 if (src_w & 1)
1848                     dst[src_w / 2] >>= 4;
1849
1850                 dst += bitplane_width;
1851             }
1852         }
1853
1854         dri_bo_unmap(bo);
1855     } else
1856         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1857 }
1858
1859 static void
1860 gen75_mfd_vc1_pic_state(VADriverContextP ctx,
1861                         struct decode_state *decode_state,
1862                         struct gen7_mfd_context *gen7_mfd_context)
1863 {
1864     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1865     VAPictureParameterBufferVC1 *pic_param;
1866     struct object_surface *obj_surface;
1867     struct gen7_vc1_surface *gen7_vc1_current_surface;
1868     struct gen7_vc1_surface *gen7_vc1_reference_surface;
1869     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1870     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1871     int unified_mv_mode = 0;
1872     int ref_field_pic_polarity = 0;
1873     int scale_factor = 0;
1874     int trans_ac_y = 0;
1875     int dmv_surface_valid = 0;
1876     int frfd = 0;
1877     int brfd = 0;
1878     int fcm = 0;
1879     int picture_type;
1880     int ptype;
1881     int overlap = 0;
1882     int interpolation_mode = 0;
1883     int height_in_mbs;
1884     int is_first_field = 1;
1885     int loopfilter = 0;
1886     int bitplane_present;
1887     int range_reduction = 0;
1888     int range_reduction_scale = 0;
1889     int forward_mb = 0, mv_type_mb = 0, skip_mb = 0, direct_mb = 0;
1890     int overflags = 0, ac_pred = 0, field_tx = 0;
1891
1892     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1893     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1894
1895     if (!pic_param->sequence_fields.bits.interlace ||
1896         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1897         picture_type = pic_param->picture_fields.bits.picture_type;
1898         height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1899     } else {/* Field-Interlace */
1900         is_first_field = pic_param->picture_fields.bits.is_first_field;
1901         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1902         height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
1903     }
1904
1905     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1906     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1907     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1908     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1909     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1910     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1911     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1912
1913     if (dquant == 0) {
1914         alt_pquant_config = 0;
1915         alt_pquant_edge_mask = 0;
1916     } else if (dquant == 2) {
1917         alt_pquant_config = 1;
1918         alt_pquant_edge_mask = 0xf;
1919     } else {
1920         assert(dquant == 1);
1921         if (dquantfrm == 0) {
1922             alt_pquant_config = 0;
1923             alt_pquant_edge_mask = 0;
1924             alt_pq = 0;
1925         } else {
1926             assert(dquantfrm == 1);
1927             alt_pquant_config = 1;
1928
1929             switch (dqprofile) {
1930             case 3:
1931                 if (dqbilevel == 0) {
1932                     alt_pquant_config = 2;
1933                     alt_pquant_edge_mask = 0;
1934                 } else {
1935                     assert(dqbilevel == 1);
1936                     alt_pquant_config = 3;
1937                     alt_pquant_edge_mask = 0;
1938                 }
1939                 break;
1940
1941             case 0:
1942                 alt_pquant_edge_mask = 0xf;
1943                 break;
1944
1945             case 1:
1946                 if (dqdbedge == 3)
1947                     alt_pquant_edge_mask = 0x9;
1948                 else
1949                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1950
1951                 break;
1952
1953             case 2:
1954                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1955                 break;
1956
1957             default:
1958                 assert(0);
1959             }
1960         }
1961     }
1962
1963     if (pic_param->sequence_fields.bits.profile == 1 && /* Main Profile */
1964         pic_param->sequence_fields.bits.rangered) {
1965         obj_surface = decode_state->reference_objects[0];
1966
1967         gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1968
1969         if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1970             obj_surface)
1971             gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1972         else
1973             gen7_vc1_reference_surface = NULL;
1974
1975         if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1976             if (gen7_vc1_reference_surface)
1977                 gen7_vc1_current_surface->range_reduction_frame = gen7_vc1_reference_surface->range_reduction_frame;
1978             else
1979                 gen7_vc1_current_surface->range_reduction_frame = 0;
1980         else
1981             gen7_vc1_current_surface->range_reduction_frame = pic_param->range_reduction_frame;
1982
1983         if (gen7_vc1_reference_surface) {
1984             if (gen7_vc1_current_surface->range_reduction_frame &&
1985                 !gen7_vc1_reference_surface->range_reduction_frame) {
1986                 range_reduction = 1;
1987                 range_reduction_scale = 0;
1988             } else if (!gen7_vc1_current_surface->range_reduction_frame &&
1989                        gen7_vc1_reference_surface->range_reduction_frame) {
1990                 range_reduction = 1;
1991                 range_reduction_scale = 1;
1992             }
1993         }
1994     }
1995
1996     if ((!pic_param->sequence_fields.bits.interlace ||
1997          pic_param->picture_fields.bits.frame_coding_mode != 1) && /* Progressive or Field-Interlace */
1998         (picture_type == GEN7_VC1_P_PICTURE ||
1999          picture_type == GEN7_VC1_B_PICTURE)) {
2000         if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
2001             assert(pic_param->mv_fields.bits.mv_mode2 < 4);
2002             unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
2003         } else {
2004             assert(pic_param->mv_fields.bits.mv_mode < 4);
2005             unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
2006         }
2007     }
2008
2009     if (pic_param->sequence_fields.bits.interlace &&
2010         pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
2011         picture_type == GEN7_VC1_P_PICTURE &&
2012         !pic_param->reference_fields.bits.num_reference_pictures) {
2013         if (pic_param->reference_fields.bits.reference_field_pic_indicator == 0) {
2014             ref_field_pic_polarity = is_first_field ?
2015                                         pic_param->picture_fields.bits.top_field_first :
2016                                         !pic_param->picture_fields.bits.top_field_first;
2017         } else {
2018             ref_field_pic_polarity = is_first_field ?
2019                                         !pic_param->picture_fields.bits.top_field_first :
2020                                         pic_param->picture_fields.bits.top_field_first;
2021         }
2022     }
2023
2024     if (pic_param->b_picture_fraction < 21)
2025         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
2026
2027     if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
2028         ptype = GEN7_VC1_P_PICTURE;
2029         bitplane_present = 1;
2030     } else {
2031         ptype = pic_param->picture_fields.bits.picture_type;
2032         bitplane_present = !!(pic_param->bitplane_present.value & 0x7f);
2033         forward_mb = pic_param->raw_coding.flags.forward_mb;
2034         mv_type_mb = pic_param->raw_coding.flags.mv_type_mb;
2035         skip_mb = pic_param->raw_coding.flags.skip_mb;
2036         direct_mb = pic_param->raw_coding.flags.direct_mb;
2037         overflags = pic_param->raw_coding.flags.overflags;
2038         ac_pred = pic_param->raw_coding.flags.ac_pred;
2039         field_tx = pic_param->raw_coding.flags.field_tx;
2040         loopfilter = pic_param->entrypoint_fields.bits.loopfilter;
2041     }
2042
2043     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
2044         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
2045     else {
2046         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
2047
2048         /*
2049          * 8.3.6.2.1 Transform Type Selection
2050          * If variable-sized transform coding is not enabled,
2051          * then the 8x8 transform shall be used for all blocks.
2052          * it is also MFX_VC1_PIC_STATE requirement.
2053          */
2054         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
2055             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
2056             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
2057         }
2058     }
2059
2060     if (picture_type == GEN7_VC1_B_PICTURE) {
2061         obj_surface = decode_state->reference_objects[1];
2062
2063         if (pic_param->backward_reference_picture != VA_INVALID_ID &&
2064             obj_surface)
2065             gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
2066         else
2067             gen7_vc1_reference_surface = NULL;
2068
2069         if (gen7_vc1_reference_surface) {
2070             if (pic_param->sequence_fields.bits.interlace &&
2071                 pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
2072                 pic_param->picture_fields.bits.top_field_first ^ is_first_field) {
2073                 if (gen7_vc1_reference_surface->picture_type_bottom == GEN7_VC1_P_PICTURE)
2074                     dmv_surface_valid = 1;
2075             } else if (gen7_vc1_reference_surface->picture_type_top == GEN7_VC1_P_PICTURE)
2076                 dmv_surface_valid = 1;
2077         }
2078     }
2079
2080     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
2081
2082     if (pic_param->sequence_fields.bits.interlace) {
2083         if (pic_param->picture_fields.bits.frame_coding_mode < 2)
2084             fcm = pic_param->picture_fields.bits.frame_coding_mode;
2085         else if (!pic_param->picture_fields.bits.top_field_first)
2086             fcm = 3; /* Field with bottom field first */
2087         else
2088             fcm = 2; /* Field with top field first */
2089     }
2090
2091     if (pic_param->sequence_fields.bits.interlace &&
2092         pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
2093         if (picture_type == GEN7_VC1_I_PICTURE ||
2094              picture_type == GEN7_VC1_P_PICTURE) {
2095             gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
2096
2097             if (is_first_field)
2098                 gen7_vc1_current_surface->reference_distance = pic_param->reference_fields.bits.reference_distance;
2099
2100             frfd = gen7_vc1_current_surface->reference_distance;
2101         } else if (picture_type == GEN7_VC1_B_PICTURE) {
2102             obj_surface = decode_state->reference_objects[1];
2103
2104             if (pic_param->backward_reference_picture != VA_INVALID_ID &&
2105                 obj_surface)
2106                 gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
2107             else
2108                 gen7_vc1_reference_surface = NULL;
2109
2110             if (gen7_vc1_reference_surface) {
2111                 frfd = (scale_factor * gen7_vc1_reference_surface->reference_distance) >> 8;
2112
2113                 brfd = gen7_vc1_reference_surface->reference_distance - frfd - 1;
2114                 if (brfd < 0)
2115                     brfd = 0;
2116             }
2117         }
2118     }
2119
2120     if (pic_param->sequence_fields.bits.overlap) {
2121         if (pic_param->sequence_fields.bits.profile == 3) { /* Advanced Profile */
2122             if (picture_type == GEN7_VC1_P_PICTURE &&
2123                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
2124                 overlap = 1;
2125             }
2126             if (picture_type == GEN7_VC1_I_PICTURE ||
2127                 picture_type == GEN7_VC1_BI_PICTURE) {
2128                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
2129                     overlap = 1;
2130                 } else if (pic_param->conditional_overlap_flag == 1 || /* all block boundaries */
2131                            pic_param->conditional_overlap_flag == 2) { /* coded by OVERFLAGSMB bitplane */
2132                     overlap = 1;
2133                 }
2134             }
2135         } else {
2136             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
2137                 picture_type != GEN7_VC1_B_PICTURE) {
2138                 overlap = 1;
2139             }
2140         }
2141     }
2142
2143     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
2144         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
2145          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
2146         interpolation_mode = 8 | pic_param->fast_uvmc_flag;
2147     else
2148         interpolation_mode = 0 | pic_param->fast_uvmc_flag;
2149
2150     BEGIN_BCS_BATCH(batch, 6);
2151     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
2152     OUT_BCS_BATCH(batch,
2153                   ((height_in_mbs - 1) << 16) |
2154                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
2155     OUT_BCS_BATCH(batch,
2156                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
2157                   dmv_surface_valid << 15 |
2158                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
2159                   pic_param->rounding_control << 13 |
2160                   pic_param->sequence_fields.bits.syncmarker << 12 |
2161                   interpolation_mode << 8 |
2162                   range_reduction_scale << 7 |
2163                   range_reduction << 6 |
2164                   loopfilter << 5 |
2165                   overlap << 4 |
2166                   !is_first_field << 3 |
2167                   (pic_param->sequence_fields.bits.profile == 3) << 0); /* Advanced Profile */
2168     OUT_BCS_BATCH(batch,
2169                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
2170                   ptype << 26 |
2171                   fcm << 24 |
2172                   alt_pq << 16 |
2173                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
2174                   scale_factor << 0);
2175     OUT_BCS_BATCH(batch,
2176                   unified_mv_mode << 28 |
2177                   pic_param->mv_fields.bits.four_mv_switch << 27 |
2178                   pic_param->fast_uvmc_flag << 26 |
2179                   ref_field_pic_polarity << 25 |
2180                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
2181                   brfd << 20 |
2182                   frfd << 16 |
2183                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
2184                   pic_param->mv_fields.bits.extended_mv_range << 8 |
2185                   alt_pquant_edge_mask << 4 |
2186                   alt_pquant_config << 2 |
2187                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |
2188                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
2189     OUT_BCS_BATCH(batch,
2190                   bitplane_present << 31 |
2191                   forward_mb << 30 |
2192                   mv_type_mb << 29 |
2193                   skip_mb << 28 |
2194                   direct_mb << 27 |
2195                   overflags << 26 |
2196                   ac_pred << 25 |
2197                   field_tx << 24 |
2198                   pic_param->mv_fields.bits.mv_table << 20 |
2199                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
2200                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
2201                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
2202                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
2203                   pic_param->mb_mode_table << 8 |
2204                   trans_ac_y << 6 |
2205                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
2206                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
2207                   pic_param->cbp_table << 0);
2208     ADVANCE_BCS_BATCH(batch);
2209 }
2210
2211 static void
2212 gen75_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
2213                               struct decode_state *decode_state,
2214                               struct gen7_mfd_context *gen7_mfd_context)
2215 {
2216     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2217     VAPictureParameterBufferVC1 *pic_param;
2218     struct gen7_vc1_surface *gen7_vc1_top_surface;
2219     struct gen7_vc1_surface *gen7_vc1_bottom_surface;
2220     int picture_type;
2221     int is_first_field = 1;
2222     int intensitycomp_single_fwd = 0;
2223     int intensitycomp_single_bwd = 0;
2224     int intensitycomp_double_fwd = 0;
2225     int lumscale1_single_fwd = 0;
2226     int lumscale2_single_fwd = 0;
2227     int lumshift1_single_fwd = 0;
2228     int lumshift2_single_fwd = 0;
2229     int lumscale1_single_bwd = 0;
2230     int lumscale2_single_bwd = 0;
2231     int lumshift1_single_bwd = 0;
2232     int lumshift2_single_bwd = 0;
2233     int lumscale1_double_fwd = 0;
2234     int lumscale2_double_fwd = 0;
2235     int lumshift1_double_fwd = 0;
2236     int lumshift2_double_fwd = 0;
2237     int replication_mode = 0;
2238
2239     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2240     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2241
2242     if (!pic_param->sequence_fields.bits.interlace ||
2243         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
2244         picture_type = pic_param->picture_fields.bits.picture_type;
2245     } else {/* Field-Interlace */
2246         is_first_field = pic_param->picture_fields.bits.is_first_field;
2247         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
2248     }
2249
2250     if (picture_type == GEN7_VC1_P_PICTURE ||
2251         picture_type == GEN7_VC1_B_PICTURE) {
2252         if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID)
2253             gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[0].obj_surface->private_data);
2254         else
2255             gen7_vc1_top_surface = NULL;
2256
2257         if (gen7_vc1_top_surface) {
2258             intensitycomp_single_fwd = !!gen7_vc1_top_surface->intensity_compensation_top;
2259             lumscale1_single_fwd = gen7_vc1_top_surface->luma_scale_top[0];
2260             lumshift1_single_fwd = gen7_vc1_top_surface->luma_shift_top[0];
2261             if (gen7_vc1_top_surface->intensity_compensation_top == 2) {
2262                 intensitycomp_double_fwd = 1;
2263                 lumscale1_double_fwd = gen7_vc1_top_surface->luma_scale_top[1];
2264                 lumshift1_double_fwd = gen7_vc1_top_surface->luma_shift_top[1];
2265             }
2266         }
2267
2268         if (pic_param->sequence_fields.bits.interlace &&
2269             pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2270             if (gen7_mfd_context->reference_surface[2].surface_id != VA_INVALID_ID)
2271                 gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[2].obj_surface->private_data);
2272             else
2273                 gen7_vc1_bottom_surface = NULL;
2274
2275             if (gen7_vc1_bottom_surface) {
2276                 intensitycomp_single_fwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
2277                 lumscale2_single_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
2278                 lumshift2_single_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
2279                 if (gen7_vc1_bottom_surface->intensity_compensation_bottom == 2) {
2280                     intensitycomp_double_fwd |= 2;
2281                     lumscale2_double_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[1];
2282                     lumshift2_double_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[1];
2283                 }
2284             }
2285         }
2286     }
2287
2288     if (picture_type == GEN7_VC1_B_PICTURE) {
2289         if (gen7_mfd_context->reference_surface[1].surface_id != VA_INVALID_ID)
2290             gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[1].obj_surface->private_data);
2291         else
2292             gen7_vc1_top_surface = NULL;
2293
2294         if (gen7_vc1_top_surface) {
2295             intensitycomp_single_bwd = !!gen7_vc1_top_surface->intensity_compensation_top;
2296             lumscale1_single_bwd = gen7_vc1_top_surface->luma_scale_top[0];
2297             lumshift1_single_bwd = gen7_vc1_top_surface->luma_shift_top[0];
2298         }
2299
2300         if (pic_param->sequence_fields.bits.interlace &&
2301             pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2302             if (gen7_mfd_context->reference_surface[3].surface_id != VA_INVALID_ID)
2303                 gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[3].obj_surface->private_data);
2304             else
2305                 gen7_vc1_bottom_surface = NULL;
2306
2307             if (gen7_vc1_bottom_surface) {
2308                 intensitycomp_single_bwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
2309                 lumscale2_single_bwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
2310                 lumshift2_single_bwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
2311             }
2312         }
2313     }
2314
2315     if (pic_param->sequence_fields.bits.interlace &&
2316         pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2317         if (picture_type == GEN7_VC1_P_PICTURE)
2318             replication_mode = 0x5;
2319         else if (picture_type == GEN7_VC1_B_PICTURE)
2320             replication_mode = 0xf;
2321     }
2322
2323     BEGIN_BCS_BATCH(batch, 6);
2324     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
2325     OUT_BCS_BATCH(batch,
2326                   intensitycomp_double_fwd << 14 |
2327                   0 << 12 |
2328                   intensitycomp_single_fwd << 10 |
2329                   intensitycomp_single_bwd << 8 |
2330                   replication_mode << 4 |
2331                   0);
2332     OUT_BCS_BATCH(batch,
2333                   lumshift2_single_fwd << 24 |
2334                   lumshift1_single_fwd << 16 |
2335                   lumscale2_single_fwd << 8 |
2336                   lumscale1_single_fwd << 0);
2337     OUT_BCS_BATCH(batch,
2338                   lumshift2_double_fwd << 24 |
2339                   lumshift1_double_fwd << 16 |
2340                   lumscale2_double_fwd << 8 |
2341                   lumscale1_double_fwd << 0);
2342     OUT_BCS_BATCH(batch,
2343                   lumshift2_single_bwd << 24 |
2344                   lumshift1_single_bwd << 16 |
2345                   lumscale2_single_bwd << 8 |
2346                   lumscale1_single_bwd << 0);
2347     OUT_BCS_BATCH(batch,
2348                   0 << 24 |
2349                   0 << 16 |
2350                   0 << 8 |
2351                   0 << 0);
2352     ADVANCE_BCS_BATCH(batch);
2353 }
2354
2355 static void
2356 gen75_mfd_vc1_directmode_state_bplus(VADriverContextP ctx,
2357                                      struct decode_state *decode_state,
2358                                      struct gen7_mfd_context *gen7_mfd_context)
2359 {
2360     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2361     VAPictureParameterBufferVC1 *pic_param;
2362     struct object_surface *obj_surface;
2363     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2364     int picture_type;
2365     int is_first_field = 1;
2366
2367     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2368
2369     if (!pic_param->sequence_fields.bits.interlace ||
2370         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
2371         picture_type = pic_param->picture_fields.bits.picture_type;
2372     } else {/* Field-Interlace */
2373         is_first_field = pic_param->picture_fields.bits.is_first_field;
2374         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
2375     }
2376
2377     if (picture_type == GEN7_VC1_P_PICTURE ||
2378         picture_type == GEN7_VC1_SKIPPED_PICTURE) {
2379         obj_surface = decode_state->render_object;
2380
2381         if (pic_param->sequence_fields.bits.interlace &&
2382             (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2383             (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2384             dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2385         else
2386             dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2387     }
2388
2389     if (picture_type == GEN7_VC1_B_PICTURE) {
2390         obj_surface = decode_state->reference_objects[1];
2391         if (pic_param->backward_reference_picture != VA_INVALID_ID &&
2392             obj_surface &&
2393             obj_surface->private_data) {
2394
2395             if (pic_param->sequence_fields.bits.interlace &&
2396                 (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2397                 (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2398                 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2399             else
2400                 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2401         }
2402     }
2403
2404     BEGIN_BCS_BATCH(batch, 7);
2405     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
2406
2407     if (dmv_write_buffer)
2408         OUT_BCS_RELOC(batch, dmv_write_buffer,
2409                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2410                       0);
2411     else
2412         OUT_BCS_BATCH(batch, 0);
2413
2414     OUT_BCS_BATCH(batch, 0);
2415     OUT_BCS_BATCH(batch, 0);
2416
2417     if (dmv_read_buffer)
2418         OUT_BCS_RELOC(batch, dmv_read_buffer,
2419                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2420                       0);
2421     else
2422         OUT_BCS_BATCH(batch, 0);
2423     OUT_BCS_BATCH(batch, 0);
2424     OUT_BCS_BATCH(batch, 0);
2425
2426     ADVANCE_BCS_BATCH(batch);
2427 }
2428
2429 static void
2430 gen75_mfd_vc1_directmode_state(VADriverContextP ctx,
2431                                struct decode_state *decode_state,
2432                                struct gen7_mfd_context *gen7_mfd_context)
2433 {
2434     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2435     VAPictureParameterBufferVC1 *pic_param;
2436     struct i965_driver_data *i965 = i965_driver_data(ctx);
2437     struct object_surface *obj_surface;
2438     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2439     int picture_type;
2440     int is_first_field = 1;
2441
2442     if (IS_STEPPING_BPLUS(i965)) {
2443         gen75_mfd_vc1_directmode_state_bplus(ctx, decode_state, gen7_mfd_context);
2444         return;
2445     }
2446
2447     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2448
2449     if (!pic_param->sequence_fields.bits.interlace ||
2450         (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
2451         picture_type = pic_param->picture_fields.bits.picture_type;
2452     } else {/* Field-Interlace */
2453         is_first_field = pic_param->picture_fields.bits.is_first_field;
2454         picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
2455     }
2456
2457     if (picture_type == GEN7_VC1_P_PICTURE ||
2458         picture_type == GEN7_VC1_SKIPPED_PICTURE) {
2459         obj_surface = decode_state->render_object;
2460
2461         if (pic_param->sequence_fields.bits.interlace &&
2462             (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2463             (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2464             dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2465         else
2466             dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2467     }
2468
2469     if (picture_type == GEN7_VC1_B_PICTURE) {
2470         obj_surface = decode_state->reference_objects[1];
2471         if (pic_param->backward_reference_picture != VA_INVALID_ID &&
2472             obj_surface &&
2473             obj_surface->private_data) {
2474
2475             if (pic_param->sequence_fields.bits.interlace &&
2476                 (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2477                 (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2478                 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2479             else
2480                 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2481         }
2482     }
2483
2484     BEGIN_BCS_BATCH(batch, 3);
2485     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
2486
2487     if (dmv_write_buffer)
2488         OUT_BCS_RELOC(batch, dmv_write_buffer,
2489                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2490                       0);
2491     else
2492         OUT_BCS_BATCH(batch, 0);
2493
2494     if (dmv_read_buffer)
2495         OUT_BCS_RELOC(batch, dmv_read_buffer,
2496                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2497                       0);
2498     else
2499         OUT_BCS_BATCH(batch, 0);
2500
2501     ADVANCE_BCS_BATCH(batch);
2502 }
2503
2504 static int
2505 gen75_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
2506 {
2507     int out_slice_data_bit_offset;
2508     int slice_header_size = in_slice_data_bit_offset / 8;
2509     int i, j;
2510
2511     if (profile == 3 && slice_header_size) { /* Advanced Profile */
2512         for (i = 0, j = 0; i < slice_header_size - 1; i++, j++)
2513             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4)
2514                     i++, j += 2;
2515
2516         if (i == slice_header_size - 1) {
2517             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
2518                 buf[j + 2] = 0;
2519                 j++;
2520             }
2521
2522             j++;
2523         }
2524
2525         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
2526     } else /* Simple or Main Profile */
2527         out_slice_data_bit_offset = in_slice_data_bit_offset;
2528
2529     return out_slice_data_bit_offset;
2530 }
2531
2532 static void
2533 gen75_mfd_vc1_bsd_object(VADriverContextP ctx,
2534                          VAPictureParameterBufferVC1 *pic_param,
2535                          VASliceParameterBufferVC1 *slice_param,
2536                          VASliceParameterBufferVC1 *next_slice_param,
2537                          dri_bo *slice_data_bo,
2538                          struct gen7_mfd_context *gen7_mfd_context)
2539 {
2540     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2541     int next_slice_start_vert_pos;
2542     int macroblock_offset;
2543     uint8_t *slice_data = NULL;
2544
2545     dri_bo_map(slice_data_bo, True);
2546     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
2547     macroblock_offset = gen75_mfd_vc1_get_macroblock_bit_offset(slice_data,
2548                                                                 slice_param->macroblock_offset,
2549                                                                 pic_param->sequence_fields.bits.profile);
2550     dri_bo_unmap(slice_data_bo);
2551
2552     if (next_slice_param)
2553         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
2554     else if (!pic_param->sequence_fields.bits.interlace ||
2555              pic_param->picture_fields.bits.frame_coding_mode < 2) /* Progressive or Frame-Interlace */
2556         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
2557     else /* Field-Interlace */
2558         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 32) / 32;
2559
2560     BEGIN_BCS_BATCH(batch, 5);
2561     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
2562     OUT_BCS_BATCH(batch,
2563                   slice_param->slice_data_size - (macroblock_offset >> 3));
2564     OUT_BCS_BATCH(batch,
2565                   slice_param->slice_data_offset + (macroblock_offset >> 3));
2566     OUT_BCS_BATCH(batch,
2567                   slice_param->slice_vertical_position << 16 |
2568                   next_slice_start_vert_pos << 0);
2569     OUT_BCS_BATCH(batch,
2570                   (macroblock_offset & 0x7));
2571     ADVANCE_BCS_BATCH(batch);
2572 }
2573
2574 static void
2575 gen75_mfd_vc1_decode_picture(VADriverContextP ctx,
2576                              struct decode_state *decode_state,
2577                              struct gen7_mfd_context *gen7_mfd_context)
2578 {
2579     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2580     VAPictureParameterBufferVC1 *pic_param;
2581     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
2582     dri_bo *slice_data_bo;
2583     int i, j;
2584
2585     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2586     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2587
2588     gen75_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
2589     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2590     intel_batchbuffer_emit_mi_flush(batch);
2591     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2592     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2593     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2594     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2595     gen75_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
2596     gen75_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
2597     gen75_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
2598
2599     for (j = 0; j < decode_state->num_slice_params; j++) {
2600         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2601         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2602         slice_data_bo = decode_state->slice_datas[j]->bo;
2603         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
2604
2605         if (j == decode_state->num_slice_params - 1)
2606             next_slice_group_param = NULL;
2607         else
2608             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2609
2610         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2611             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2612
2613             if (i < decode_state->slice_params[j]->num_elements - 1)
2614                 next_slice_param = slice_param + 1;
2615             else
2616                 next_slice_param = next_slice_group_param;
2617
2618             gen75_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2619             slice_param++;
2620         }
2621     }
2622
2623     intel_batchbuffer_end_atomic(batch);
2624     intel_batchbuffer_flush(batch);
2625 }
2626
2627 static void
2628 gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
2629                            struct decode_state *decode_state,
2630                            struct gen7_mfd_context *gen7_mfd_context)
2631 {
2632     struct object_surface *obj_surface;
2633     VAPictureParameterBufferJPEGBaseline *pic_param;
2634     int subsampling = SUBSAMPLE_YUV420;
2635     int fourcc = VA_FOURCC_IMC3;
2636
2637     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2638
2639     if (pic_param->num_components == 1) {
2640         subsampling = SUBSAMPLE_YUV400;
2641         fourcc = VA_FOURCC_Y800;
2642     } else if (pic_param->num_components == 3) {
2643         int h1 = pic_param->components[0].h_sampling_factor;
2644         int h2 = pic_param->components[1].h_sampling_factor;
2645         int h3 = pic_param->components[2].h_sampling_factor;
2646         int v1 = pic_param->components[0].v_sampling_factor;
2647         int v2 = pic_param->components[1].v_sampling_factor;
2648         int v3 = pic_param->components[2].v_sampling_factor;
2649
2650         if (h1 == 2 * h2 && h2 == h3 &&
2651             v1 == 2 * v2 && v2 == v3) {
2652             subsampling = SUBSAMPLE_YUV420;
2653             fourcc = VA_FOURCC_IMC3;
2654         } else if (h1 == 2 * h2  && h2 == h3 &&
2655                    v1 == v2 && v2 == v3) {
2656             subsampling = SUBSAMPLE_YUV422H;
2657             fourcc = VA_FOURCC_422H;
2658         } else if (h1 == h2 && h2 == h3 &&
2659                    v1 == v2  && v2 == v3) {
2660             subsampling = SUBSAMPLE_YUV444;
2661             fourcc = VA_FOURCC_444P;
2662         } else if (h1 == 4 * h2 && h2 ==  h3 &&
2663                    v1 == v2 && v2 == v3) {
2664             subsampling = SUBSAMPLE_YUV411;
2665             fourcc = VA_FOURCC_411P;
2666         } else if (h1 == h2 && h2 == h3 &&
2667                    v1 == 2 * v2 && v2 == v3) {
2668             subsampling = SUBSAMPLE_YUV422V;
2669             fourcc = VA_FOURCC_422V;
2670         } else
2671             assert(0);
2672     } else {
2673         assert(0);
2674     }
2675
2676     /* Current decoded picture */
2677     obj_surface = decode_state->render_object;
2678     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
2679
2680     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2681     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2682     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2683     gen7_mfd_context->pre_deblocking_output.valid = 1;
2684
2685     gen7_mfd_context->post_deblocking_output.bo = NULL;
2686     gen7_mfd_context->post_deblocking_output.valid = 0;
2687
2688     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2689     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2690
2691     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2692     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2693
2694     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2695     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2696
2697     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2698     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2699
2700     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2701     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2702 }
2703
2704 static const int va_to_gen7_jpeg_rotation[4] = {
2705     GEN7_JPEG_ROTATION_0,
2706     GEN7_JPEG_ROTATION_90,
2707     GEN7_JPEG_ROTATION_180,
2708     GEN7_JPEG_ROTATION_270
2709 };
2710
2711 static void
2712 gen75_mfd_jpeg_pic_state(VADriverContextP ctx,
2713                          struct decode_state *decode_state,
2714                          struct gen7_mfd_context *gen7_mfd_context)
2715 {
2716     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2717     VAPictureParameterBufferJPEGBaseline *pic_param;
2718     int chroma_type = GEN7_YUV420;
2719     int frame_width_in_blks;
2720     int frame_height_in_blks;
2721
2722     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2723     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2724
2725     if (pic_param->num_components == 1)
2726         chroma_type = GEN7_YUV400;
2727     else if (pic_param->num_components == 3) {
2728         int h1 = pic_param->components[0].h_sampling_factor;
2729         int h2 = pic_param->components[1].h_sampling_factor;
2730         int h3 = pic_param->components[2].h_sampling_factor;
2731         int v1 = pic_param->components[0].v_sampling_factor;
2732         int v2 = pic_param->components[1].v_sampling_factor;
2733         int v3 = pic_param->components[2].v_sampling_factor;
2734
2735         if (h1 == 2 * h2 && h2 == h3 &&
2736             v1 == 2 * v2 && v2 == v3)
2737             chroma_type = GEN7_YUV420;
2738         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2739                  v1 == 1 && v2 == 1 && v3 == 1)
2740             chroma_type = GEN7_YUV422H_2Y;
2741         else if (h1 == h2 && h2 == h3 &&
2742                  v1 == v2 && v2 == v3)
2743             chroma_type = GEN7_YUV444;
2744         else if (h1 == 4 * h2 && h2 == h3 &&
2745                  v1 == v2 && v2 == v3)
2746             chroma_type = GEN7_YUV411;
2747         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2748                  v1 == 2 && v2 == 1 && v3 == 1)
2749             chroma_type = GEN7_YUV422V_2Y;
2750         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2751                  v1 == 2 && v2 == 2 && v3 == 2)
2752             chroma_type = GEN7_YUV422H_4Y;
2753         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2754                  v1 == 2 && v2 == 1 && v3 == 1)
2755             chroma_type = GEN7_YUV422V_4Y;
2756         else
2757             assert(0);
2758     }
2759
2760     if (chroma_type == GEN7_YUV400 ||
2761         chroma_type == GEN7_YUV444 ||
2762         chroma_type == GEN7_YUV422V_2Y) {
2763         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2764         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2765     } else if (chroma_type == GEN7_YUV411) {
2766         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2767         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2768     } else {
2769         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2770         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2771     }
2772
2773     BEGIN_BCS_BATCH(batch, 3);
2774     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2775     OUT_BCS_BATCH(batch,
2776                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2777                   (chroma_type << 0));
2778     OUT_BCS_BATCH(batch,
2779                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2780                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2781     ADVANCE_BCS_BATCH(batch);
2782 }
2783
2784 static const int va_to_gen7_jpeg_hufftable[2] = {
2785     MFX_HUFFTABLE_ID_Y,
2786     MFX_HUFFTABLE_ID_UV
2787 };
2788
2789 static void
2790 gen75_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2791                                 struct decode_state *decode_state,
2792                                 struct gen7_mfd_context *gen7_mfd_context,
2793                                 int num_tables)
2794 {
2795     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2796     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2797     int index;
2798
2799     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2800         return;
2801
2802     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2803
2804     for (index = 0; index < num_tables; index++) {
2805         int id = va_to_gen7_jpeg_hufftable[index];
2806
2807         if (!huffman_table->load_huffman_table[index])
2808             continue;
2809
2810         BEGIN_BCS_BATCH(batch, 53);
2811         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2812         OUT_BCS_BATCH(batch, id);
2813         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2814         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2815         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2816         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2817         ADVANCE_BCS_BATCH(batch);
2818     }
2819 }
2820
2821 static const int va_to_gen7_jpeg_qm[5] = {
2822     -1,
2823     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2824     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2825     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2826     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2827 };
2828
2829 static void
2830 gen75_mfd_jpeg_qm_state(VADriverContextP ctx,
2831                         struct decode_state *decode_state,
2832                         struct gen7_mfd_context *gen7_mfd_context)
2833 {
2834     VAPictureParameterBufferJPEGBaseline *pic_param;
2835     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2836     int index;
2837
2838     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2839         return;
2840
2841     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2842     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2843
2844     assert(pic_param->num_components <= 3);
2845
2846     for (index = 0; index < pic_param->num_components; index++) {
2847         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2848         int qm_type;
2849         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2850         unsigned char raster_qm[64];
2851         int j;
2852
2853         if (id > 4 || id < 1)
2854             continue;
2855
2856         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2857             continue;
2858
2859         qm_type = va_to_gen7_jpeg_qm[id];
2860
2861         for (j = 0; j < 64; j++)
2862             raster_qm[zigzag_direct[j]] = qm[j];
2863
2864         gen75_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2865     }
2866 }
2867
2868 static void
2869 gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
2870                           VAPictureParameterBufferJPEGBaseline *pic_param,
2871                           VASliceParameterBufferJPEGBaseline *slice_param,
2872                           VASliceParameterBufferJPEGBaseline *next_slice_param,
2873                           dri_bo *slice_data_bo,
2874                           struct gen7_mfd_context *gen7_mfd_context)
2875 {
2876     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2877     int scan_component_mask = 0;
2878     int i;
2879
2880     assert(slice_param->num_components > 0);
2881     assert(slice_param->num_components < 4);
2882     assert(slice_param->num_components <= pic_param->num_components);
2883
2884     for (i = 0; i < slice_param->num_components; i++) {
2885         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2886         case 1:
2887             scan_component_mask |= (1 << 0);
2888             break;
2889         case 2:
2890             scan_component_mask |= (1 << 1);
2891             break;
2892         case 3:
2893             scan_component_mask |= (1 << 2);
2894             break;
2895         default:
2896             assert(0);
2897             break;
2898         }
2899     }
2900
2901     BEGIN_BCS_BATCH(batch, 6);
2902     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2903     OUT_BCS_BATCH(batch,
2904                   slice_param->slice_data_size);
2905     OUT_BCS_BATCH(batch,
2906                   slice_param->slice_data_offset);
2907     OUT_BCS_BATCH(batch,
2908                   slice_param->slice_horizontal_position << 16 |
2909                   slice_param->slice_vertical_position << 0);
2910     OUT_BCS_BATCH(batch,
2911                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2912                   (scan_component_mask << 27) |                 /* scan components */
2913                   (0 << 26) |   /* disable interrupt allowed */
2914                   (slice_param->num_mcus << 0));                /* MCU count */
2915     OUT_BCS_BATCH(batch,
2916                   (slice_param->restart_interval << 0));    /* RestartInterval */
2917     ADVANCE_BCS_BATCH(batch);
2918 }
2919
2920 /* Workaround for JPEG decoding on Ivybridge */
2921
2922 static struct {
2923     int width;
2924     int height;
2925     unsigned char data[32];
2926     int data_size;
2927     int data_bit_offset;
2928     int qp;
2929 } gen7_jpeg_wa_clip = {
2930     16,
2931     16,
2932     {
2933         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2934         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2935     },
2936     14,
2937     40,
2938     28,
2939 };
2940
2941 static void
2942 gen75_jpeg_wa_init(VADriverContextP ctx,
2943                    struct gen7_mfd_context *gen7_mfd_context)
2944 {
2945     struct i965_driver_data *i965 = i965_driver_data(ctx);
2946     VAStatus status;
2947     struct object_surface *obj_surface;
2948
2949     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2950         i965_DestroySurfaces(ctx,
2951                              &gen7_mfd_context->jpeg_wa_surface_id,
2952                              1);
2953
2954     status = i965_CreateSurfaces(ctx,
2955                                  gen7_jpeg_wa_clip.width,
2956                                  gen7_jpeg_wa_clip.height,
2957                                  VA_RT_FORMAT_YUV420,
2958                                  1,
2959                                  &gen7_mfd_context->jpeg_wa_surface_id);
2960     assert(status == VA_STATUS_SUCCESS);
2961
2962     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2963     assert(obj_surface);
2964     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2965     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2966
2967     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2968         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2969                                                                "JPEG WA data",
2970                                                                0x1000,
2971                                                                0x1000);
2972         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2973                        0,
2974                        gen7_jpeg_wa_clip.data_size,
2975                        gen7_jpeg_wa_clip.data);
2976     }
2977 }
2978
2979 static void
2980 gen75_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2981                                struct gen7_mfd_context *gen7_mfd_context)
2982 {
2983     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2984
2985     BEGIN_BCS_BATCH(batch, 5);
2986     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2987     OUT_BCS_BATCH(batch,
2988                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2989                   (MFD_MODE_VLD << 15) | /* VLD mode */
2990                   (0 << 10) | /* disable Stream-Out */
2991                   (0 << 9)  | /* Post Deblocking Output */
2992                   (1 << 8)  | /* Pre Deblocking Output */
2993                   (0 << 5)  | /* not in stitch mode */
2994                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2995                   (MFX_FORMAT_AVC << 0));
2996     OUT_BCS_BATCH(batch,
2997                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2998                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2999                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
3000                   (0 << 1)  |
3001                   (0 << 0));
3002     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
3003     OUT_BCS_BATCH(batch, 0); /* reserved */
3004     ADVANCE_BCS_BATCH(batch);
3005 }
3006
3007 static void
3008 gen75_jpeg_wa_surface_state(VADriverContextP ctx,
3009                             struct gen7_mfd_context *gen7_mfd_context)
3010 {
3011     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
3012     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3013
3014     BEGIN_BCS_BATCH(batch, 6);
3015     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
3016     OUT_BCS_BATCH(batch, 0);
3017     OUT_BCS_BATCH(batch,
3018                   ((obj_surface->orig_width - 1) << 18) |
3019                   ((obj_surface->orig_height - 1) << 4));
3020     OUT_BCS_BATCH(batch,
3021                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
3022                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
3023                   (0 << 22) | /* surface object control state, ignored */
3024                   ((obj_surface->width - 1) << 3) | /* pitch */
3025                   (0 << 2)  | /* must be 0 */
3026                   (1 << 1)  | /* must be tiled */
3027                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
3028     OUT_BCS_BATCH(batch,
3029                   (0 << 16) | /* X offset for U(Cb), must be 0 */
3030                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
3031     OUT_BCS_BATCH(batch,
3032                   (0 << 16) | /* X offset for V(Cr), must be 0 */
3033                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
3034     ADVANCE_BCS_BATCH(batch);
3035 }
3036
3037 static void
3038 gen75_jpeg_wa_pipe_buf_addr_state_bplus(VADriverContextP ctx,
3039                                         struct gen7_mfd_context *gen7_mfd_context)
3040 {
3041     struct i965_driver_data *i965 = i965_driver_data(ctx);
3042     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
3043     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3044     dri_bo *intra_bo;
3045     int i;
3046
3047     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
3048                             "intra row store",
3049                             128 * 64,
3050                             0x1000);
3051
3052     BEGIN_BCS_BATCH(batch, 61);
3053     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
3054     OUT_BCS_RELOC(batch,
3055                   obj_surface->bo,
3056                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
3057                   0);
3058     OUT_BCS_BATCH(batch, 0);
3059     OUT_BCS_BATCH(batch, 0);
3060
3061
3062     OUT_BCS_BATCH(batch, 0); /* post deblocking */
3063     OUT_BCS_BATCH(batch, 0);
3064     OUT_BCS_BATCH(batch, 0);
3065
3066     /* uncompressed-video & stream out 7-12 */
3067     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
3068     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
3069     OUT_BCS_BATCH(batch, 0);
3070     OUT_BCS_BATCH(batch, 0);
3071     OUT_BCS_BATCH(batch, 0);
3072     OUT_BCS_BATCH(batch, 0);
3073
3074     /* the DW 13-15 is for intra row store scratch */
3075     OUT_BCS_RELOC(batch,
3076                   intra_bo,
3077                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
3078                   0);
3079     OUT_BCS_BATCH(batch, 0);
3080     OUT_BCS_BATCH(batch, 0);
3081
3082     /* the DW 16-18 is for deblocking filter */
3083     OUT_BCS_BATCH(batch, 0);
3084     OUT_BCS_BATCH(batch, 0);
3085     OUT_BCS_BATCH(batch, 0);
3086
3087     /* DW 19..50 */
3088     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3089         OUT_BCS_BATCH(batch, 0);
3090         OUT_BCS_BATCH(batch, 0);
3091     }
3092     OUT_BCS_BATCH(batch, 0);
3093
3094     /* the DW52-54 is for mb status address */
3095     OUT_BCS_BATCH(batch, 0);
3096     OUT_BCS_BATCH(batch, 0);
3097     OUT_BCS_BATCH(batch, 0);
3098     /* the DW56-60 is for ILDB & second ILDB address */
3099     OUT_BCS_BATCH(batch, 0);
3100     OUT_BCS_BATCH(batch, 0);
3101     OUT_BCS_BATCH(batch, 0);
3102     OUT_BCS_BATCH(batch, 0);
3103     OUT_BCS_BATCH(batch, 0);
3104     OUT_BCS_BATCH(batch, 0);
3105
3106     ADVANCE_BCS_BATCH(batch);
3107
3108     dri_bo_unreference(intra_bo);
3109 }
3110
3111 static void
3112 gen75_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
3113                                   struct gen7_mfd_context *gen7_mfd_context)
3114 {
3115     struct i965_driver_data *i965 = i965_driver_data(ctx);
3116     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
3117     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3118     dri_bo *intra_bo;
3119     int i;
3120
3121     if (IS_STEPPING_BPLUS(i965)) {
3122         gen75_jpeg_wa_pipe_buf_addr_state_bplus(ctx, gen7_mfd_context);
3123         return;
3124     }
3125
3126     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
3127                             "intra row store",
3128                             128 * 64,
3129                             0x1000);
3130
3131     BEGIN_BCS_BATCH(batch, 25);
3132     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
3133     OUT_BCS_RELOC(batch,
3134                   obj_surface->bo,
3135                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
3136                   0);
3137
3138     OUT_BCS_BATCH(batch, 0); /* post deblocking */
3139
3140     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
3141     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
3142
3143     OUT_BCS_RELOC(batch,
3144                   intra_bo,
3145                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
3146                   0);
3147
3148     OUT_BCS_BATCH(batch, 0);
3149
3150     /* DW 7..22 */
3151     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3152         OUT_BCS_BATCH(batch, 0);
3153     }
3154
3155     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
3156     OUT_BCS_BATCH(batch, 0);
3157     ADVANCE_BCS_BATCH(batch);
3158
3159     dri_bo_unreference(intra_bo);
3160 }
3161
3162 static void
3163 gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
3164                                             struct gen7_mfd_context *gen7_mfd_context)
3165 {
3166     struct i965_driver_data *i965 = i965_driver_data(ctx);
3167     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3168     dri_bo *bsd_mpc_bo, *mpr_bo;
3169
3170     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
3171                               "bsd mpc row store",
3172                               11520, /* 1.5 * 120 * 64 */
3173                               0x1000);
3174
3175     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
3176                           "mpr row store",
3177                           7680, /* 1. 0 * 120 * 64 */
3178                           0x1000);
3179
3180     BEGIN_BCS_BATCH(batch, 10);
3181     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
3182
3183     OUT_BCS_RELOC(batch,
3184                   bsd_mpc_bo,
3185                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
3186                   0);
3187
3188     OUT_BCS_BATCH(batch, 0);
3189     OUT_BCS_BATCH(batch, 0);
3190
3191     OUT_BCS_RELOC(batch,
3192                   mpr_bo,
3193                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
3194                   0);
3195     OUT_BCS_BATCH(batch, 0);
3196     OUT_BCS_BATCH(batch, 0);
3197
3198     OUT_BCS_BATCH(batch, 0);
3199     OUT_BCS_BATCH(batch, 0);
3200     OUT_BCS_BATCH(batch, 0);
3201
3202     ADVANCE_BCS_BATCH(batch);
3203
3204     dri_bo_unreference(bsd_mpc_bo);
3205     dri_bo_unreference(mpr_bo);
3206 }
3207
3208 static void
3209 gen75_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
3210                                       struct gen7_mfd_context *gen7_mfd_context)
3211 {
3212     struct i965_driver_data *i965 = i965_driver_data(ctx);
3213     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3214     dri_bo *bsd_mpc_bo, *mpr_bo;
3215
3216     if (IS_STEPPING_BPLUS(i965)) {
3217         gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(ctx, gen7_mfd_context);
3218         return;
3219     }
3220
3221     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
3222                               "bsd mpc row store",
3223                               11520, /* 1.5 * 120 * 64 */
3224                               0x1000);
3225
3226     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
3227                           "mpr row store",
3228                           7680, /* 1. 0 * 120 * 64 */
3229                           0x1000);
3230
3231     BEGIN_BCS_BATCH(batch, 4);
3232     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
3233
3234     OUT_BCS_RELOC(batch,
3235                   bsd_mpc_bo,
3236                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
3237                   0);
3238
3239     OUT_BCS_RELOC(batch,
3240                   mpr_bo,
3241                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
3242                   0);
3243     OUT_BCS_BATCH(batch, 0);
3244
3245     ADVANCE_BCS_BATCH(batch);
3246
3247     dri_bo_unreference(bsd_mpc_bo);
3248     dri_bo_unreference(mpr_bo);
3249 }
3250
3251 static void
3252 gen75_jpeg_wa_avc_qm_state(VADriverContextP ctx,
3253                            struct gen7_mfd_context *gen7_mfd_context)
3254 {
3255
3256 }
3257
3258 static void
3259 gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
3260                             struct gen7_mfd_context *gen7_mfd_context)
3261 {
3262     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3263     int img_struct = 0;
3264     int mbaff_frame_flag = 0;
3265     unsigned int width_in_mbs = 1, height_in_mbs = 1;
3266
3267     BEGIN_BCS_BATCH(batch, 16);
3268     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
3269     OUT_BCS_BATCH(batch,
3270                   (width_in_mbs * height_in_mbs - 1));
3271     OUT_BCS_BATCH(batch,
3272                   ((height_in_mbs - 1) << 16) |
3273                   ((width_in_mbs - 1) << 0));
3274     OUT_BCS_BATCH(batch,
3275                   (0 << 24) |
3276                   (0 << 16) |
3277                   (0 << 14) |
3278                   (0 << 13) |
3279                   (0 << 12) | /* differ from GEN6 */
3280                   (0 << 10) |
3281                   (img_struct << 8));
3282     OUT_BCS_BATCH(batch,
3283                   (1 << 10) | /* 4:2:0 */
3284                   (1 << 7) |  /* CABAC */
3285                   (0 << 6) |
3286                   (0 << 5) |
3287                   (0 << 4) |
3288                   (0 << 3) |
3289                   (1 << 2) |
3290                   (mbaff_frame_flag << 1) |
3291                   (0 << 0));
3292     OUT_BCS_BATCH(batch, 0);
3293     OUT_BCS_BATCH(batch, 0);
3294     OUT_BCS_BATCH(batch, 0);
3295     OUT_BCS_BATCH(batch, 0);
3296     OUT_BCS_BATCH(batch, 0);
3297     OUT_BCS_BATCH(batch, 0);
3298     OUT_BCS_BATCH(batch, 0);
3299     OUT_BCS_BATCH(batch, 0);
3300     OUT_BCS_BATCH(batch, 0);
3301     OUT_BCS_BATCH(batch, 0);
3302     OUT_BCS_BATCH(batch, 0);
3303     ADVANCE_BCS_BATCH(batch);
3304 }
3305
3306 static void
3307 gen75_jpeg_wa_avc_directmode_state_bplus(VADriverContextP ctx,
3308                                          struct gen7_mfd_context *gen7_mfd_context)
3309 {
3310     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3311     int i;
3312
3313     BEGIN_BCS_BATCH(batch, 71);
3314     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
3315
3316     /* reference surfaces 0..15 */
3317     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3318         OUT_BCS_BATCH(batch, 0); /* top */
3319         OUT_BCS_BATCH(batch, 0); /* bottom */
3320     }
3321
3322     OUT_BCS_BATCH(batch, 0);
3323
3324     /* the current decoding frame/field */
3325     OUT_BCS_BATCH(batch, 0); /* top */
3326     OUT_BCS_BATCH(batch, 0);
3327     OUT_BCS_BATCH(batch, 0);
3328
3329     /* POC List */
3330     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3331         OUT_BCS_BATCH(batch, 0);
3332         OUT_BCS_BATCH(batch, 0);
3333     }
3334
3335     OUT_BCS_BATCH(batch, 0);
3336     OUT_BCS_BATCH(batch, 0);
3337
3338     ADVANCE_BCS_BATCH(batch);
3339 }
3340
3341 static void
3342 gen75_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
3343                                    struct gen7_mfd_context *gen7_mfd_context)
3344 {
3345     struct i965_driver_data *i965 = i965_driver_data(ctx);
3346     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3347     int i;
3348
3349     if (IS_STEPPING_BPLUS(i965)) {
3350         gen75_jpeg_wa_avc_directmode_state_bplus(ctx, gen7_mfd_context);
3351         return;
3352     }
3353
3354     BEGIN_BCS_BATCH(batch, 69);
3355     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
3356
3357     /* reference surfaces 0..15 */
3358     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3359         OUT_BCS_BATCH(batch, 0); /* top */
3360         OUT_BCS_BATCH(batch, 0); /* bottom */
3361     }
3362
3363     /* the current decoding frame/field */
3364     OUT_BCS_BATCH(batch, 0); /* top */
3365     OUT_BCS_BATCH(batch, 0); /* bottom */
3366
3367     /* POC List */
3368     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3369         OUT_BCS_BATCH(batch, 0);
3370         OUT_BCS_BATCH(batch, 0);
3371     }
3372
3373     OUT_BCS_BATCH(batch, 0);
3374     OUT_BCS_BATCH(batch, 0);
3375
3376     ADVANCE_BCS_BATCH(batch);
3377 }
3378
3379 static void
3380 gen75_jpeg_wa_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
3381                                             struct gen7_mfd_context *gen7_mfd_context)
3382 {
3383     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3384
3385     BEGIN_BCS_BATCH(batch, 11);
3386     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
3387     OUT_BCS_RELOC(batch,
3388                   gen7_mfd_context->jpeg_wa_slice_data_bo,
3389                   I915_GEM_DOMAIN_INSTRUCTION, 0,
3390                   0);
3391     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
3392     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3393     OUT_BCS_BATCH(batch, 0);
3394     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3395     OUT_BCS_BATCH(batch, 0);
3396     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3397     OUT_BCS_BATCH(batch, 0);
3398     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3399     OUT_BCS_BATCH(batch, 0);
3400     ADVANCE_BCS_BATCH(batch);
3401 }
3402
3403 static void
3404 gen75_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
3405                                       struct gen7_mfd_context *gen7_mfd_context)
3406 {
3407     struct i965_driver_data *i965 = i965_driver_data(ctx);
3408     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3409
3410     if (IS_STEPPING_BPLUS(i965)) {
3411         gen75_jpeg_wa_ind_obj_base_addr_state_bplus(ctx, gen7_mfd_context);
3412         return;
3413     }
3414
3415     BEGIN_BCS_BATCH(batch, 11);
3416     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
3417     OUT_BCS_RELOC(batch,
3418                   gen7_mfd_context->jpeg_wa_slice_data_bo,
3419                   I915_GEM_DOMAIN_INSTRUCTION, 0,
3420                   0);
3421     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
3422     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3423     OUT_BCS_BATCH(batch, 0);
3424     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3425     OUT_BCS_BATCH(batch, 0);
3426     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3427     OUT_BCS_BATCH(batch, 0);
3428     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3429     OUT_BCS_BATCH(batch, 0);
3430     ADVANCE_BCS_BATCH(batch);
3431 }
3432
3433 static void
3434 gen75_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
3435                              struct gen7_mfd_context *gen7_mfd_context)
3436 {
3437     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3438
3439     /* the input bitsteam format on GEN7 differs from GEN6 */
3440     BEGIN_BCS_BATCH(batch, 6);
3441     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
3442     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
3443     OUT_BCS_BATCH(batch, 0);
3444     OUT_BCS_BATCH(batch,
3445                   (0 << 31) |
3446                   (0 << 14) |
3447                   (0 << 12) |
3448                   (0 << 10) |
3449                   (0 << 8));
3450     OUT_BCS_BATCH(batch,
3451                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
3452                   (0 << 5)  |
3453                   (0 << 4)  |
3454                   (1 << 3) | /* LastSlice Flag */
3455                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
3456     OUT_BCS_BATCH(batch, 0);
3457     ADVANCE_BCS_BATCH(batch);
3458 }
3459
3460 static void
3461 gen75_jpeg_wa_avc_slice_state(VADriverContextP ctx,
3462                               struct gen7_mfd_context *gen7_mfd_context)
3463 {
3464     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3465     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
3466     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
3467     int first_mb_in_slice = 0;
3468     int slice_type = SLICE_TYPE_I;
3469
3470     BEGIN_BCS_BATCH(batch, 11);
3471     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3472     OUT_BCS_BATCH(batch, slice_type);
3473     OUT_BCS_BATCH(batch,
3474                   (num_ref_idx_l1 << 24) |
3475                   (num_ref_idx_l0 << 16) |
3476                   (0 << 8) |
3477                   (0 << 0));
3478     OUT_BCS_BATCH(batch,
3479                   (0 << 29) |
3480                   (1 << 27) |   /* disable Deblocking */
3481                   (0 << 24) |
3482                   (gen7_jpeg_wa_clip.qp << 16) |
3483                   (0 << 8) |
3484                   (0 << 0));
3485     OUT_BCS_BATCH(batch,
3486                   (slice_ver_pos << 24) |
3487                   (slice_hor_pos << 16) |
3488                   (first_mb_in_slice << 0));
3489     OUT_BCS_BATCH(batch,
3490                   (next_slice_ver_pos << 16) |
3491                   (next_slice_hor_pos << 0));
3492     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
3493     OUT_BCS_BATCH(batch, 0);
3494     OUT_BCS_BATCH(batch, 0);
3495     OUT_BCS_BATCH(batch, 0);
3496     OUT_BCS_BATCH(batch, 0);
3497     ADVANCE_BCS_BATCH(batch);
3498 }
3499
3500 static void
3501 gen75_mfd_jpeg_wa(VADriverContextP ctx,
3502                   struct gen7_mfd_context *gen7_mfd_context)
3503 {
3504     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3505     gen75_jpeg_wa_init(ctx, gen7_mfd_context);
3506     intel_batchbuffer_emit_mi_flush(batch);
3507     gen75_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
3508     gen75_jpeg_wa_surface_state(ctx, gen7_mfd_context);
3509     gen75_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
3510     gen75_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
3511     gen75_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
3512     gen75_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
3513     gen75_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
3514
3515     gen75_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
3516     gen75_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
3517     gen75_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
3518 }
3519
3520 void
3521 gen75_mfd_jpeg_decode_picture(VADriverContextP ctx,
3522                               struct decode_state *decode_state,
3523                               struct gen7_mfd_context *gen7_mfd_context)
3524 {
3525     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3526     VAPictureParameterBufferJPEGBaseline *pic_param;
3527     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
3528     dri_bo *slice_data_bo;
3529     int i, j, max_selector = 0;
3530
3531     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3532     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
3533
3534     /* Currently only support Baseline DCT */
3535     gen75_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
3536     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3537     gen75_mfd_jpeg_wa(ctx, gen7_mfd_context);
3538     intel_batchbuffer_emit_mi_flush(batch);
3539     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3540     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3541     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3542     gen75_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
3543     gen75_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
3544
3545     for (j = 0; j < decode_state->num_slice_params; j++) {
3546         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3547         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3548         slice_data_bo = decode_state->slice_datas[j]->bo;
3549         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3550
3551         if (j == decode_state->num_slice_params - 1)
3552             next_slice_group_param = NULL;
3553         else
3554             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3555
3556         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3557             int component;
3558
3559             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3560
3561             if (i < decode_state->slice_params[j]->num_elements - 1)
3562                 next_slice_param = slice_param + 1;
3563             else
3564                 next_slice_param = next_slice_group_param;
3565
3566             for (component = 0; component < slice_param->num_components; component++) {
3567                 if (max_selector < slice_param->components[component].dc_table_selector)
3568                     max_selector = slice_param->components[component].dc_table_selector;
3569
3570                 if (max_selector < slice_param->components[component].ac_table_selector)
3571                     max_selector = slice_param->components[component].ac_table_selector;
3572             }
3573
3574             slice_param++;
3575         }
3576     }
3577
3578     assert(max_selector < 2);
3579     gen75_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
3580
3581     for (j = 0; j < decode_state->num_slice_params; j++) {
3582         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3583         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3584         slice_data_bo = decode_state->slice_datas[j]->bo;
3585         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3586
3587         if (j == decode_state->num_slice_params - 1)
3588             next_slice_group_param = NULL;
3589         else
3590             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3591
3592         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3593             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3594
3595             if (i < decode_state->slice_params[j]->num_elements - 1)
3596                 next_slice_param = slice_param + 1;
3597             else
3598                 next_slice_param = next_slice_group_param;
3599
3600             gen75_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
3601             slice_param++;
3602         }
3603     }
3604
3605     intel_batchbuffer_end_atomic(batch);
3606     intel_batchbuffer_flush(batch);
3607 }
3608
3609 static VAStatus
3610 gen75_mfd_decode_picture(VADriverContextP ctx,
3611                          VAProfile profile,
3612                          union codec_state *codec_state,
3613                          struct hw_context *hw_context)
3614
3615 {
3616     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3617     struct decode_state *decode_state = &codec_state->decode;
3618     VAStatus vaStatus;
3619
3620     assert(gen7_mfd_context);
3621
3622     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3623
3624     if (vaStatus != VA_STATUS_SUCCESS)
3625         goto out;
3626
3627     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3628
3629     switch (profile) {
3630     case VAProfileMPEG2Simple:
3631     case VAProfileMPEG2Main:
3632         gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3633         break;
3634
3635     case VAProfileH264ConstrainedBaseline:
3636     case VAProfileH264Main:
3637     case VAProfileH264High:
3638     case VAProfileH264StereoHigh:
3639     case VAProfileH264MultiviewHigh:
3640         gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3641         break;
3642
3643     case VAProfileVC1Simple:
3644     case VAProfileVC1Main:
3645     case VAProfileVC1Advanced:
3646         gen75_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3647         break;
3648
3649     case VAProfileJPEGBaseline:
3650         gen75_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3651         break;
3652
3653     default:
3654         assert(0);
3655         break;
3656     }
3657
3658     vaStatus = VA_STATUS_SUCCESS;
3659
3660 out:
3661     return vaStatus;
3662 }
3663
3664 static void
3665 gen75_mfd_context_destroy(void *hw_context)
3666 {
3667     VADriverContextP ctx;
3668     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3669
3670     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3671
3672     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3673     gen7_mfd_context->post_deblocking_output.bo = NULL;
3674
3675     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3676     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3677
3678     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3679     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3680
3681     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3682     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3683
3684     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3685     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3686
3687     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3688     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3689
3690     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3691     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3692
3693     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3694
3695     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3696         i965_DestroySurfaces(ctx,
3697                              &gen7_mfd_context->jpeg_wa_surface_id,
3698                              1);
3699         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3700     }
3701
3702     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3703     free(gen7_mfd_context);
3704 }
3705
3706 static void gen75_mfd_mpeg2_context_init(VADriverContextP ctx,
3707                                          struct gen7_mfd_context *gen7_mfd_context)
3708 {
3709     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3710     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3711     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3712     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3713 }
3714
3715 struct hw_context *
3716 gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3717 {
3718     struct intel_driver_data *intel = intel_driver_data(ctx);
3719     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3720     int i;
3721
3722     assert(gen7_mfd_context);
3723     gen7_mfd_context->base.destroy = gen75_mfd_context_destroy;
3724     gen7_mfd_context->base.run = gen75_mfd_decode_picture;
3725     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3726
3727     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3728         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3729         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3730         gen7_mfd_context->reference_surface[i].obj_surface = NULL;
3731     }
3732
3733     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3734     gen7_mfd_context->jpeg_wa_surface_object = NULL;
3735
3736     switch (obj_config->profile) {
3737     case VAProfileMPEG2Simple:
3738     case VAProfileMPEG2Main:
3739         gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3740         break;
3741
3742     case VAProfileH264ConstrainedBaseline:
3743     case VAProfileH264Main:
3744     case VAProfileH264High:
3745     case VAProfileH264StereoHigh:
3746     case VAProfileH264MultiviewHigh:
3747         gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
3748         break;
3749     default:
3750         break;
3751     }
3752
3753     gen7_mfd_context->driver_context = ctx;
3754     return (struct hw_context *)gen7_mfd_context;
3755 }