OSDN Git Service

genX_mfd: no raw coded bitplanes for skipped pictures
[android-x86/hardware-intel-common-vaapi.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV     2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77
78         if (!gen7_avc_surface)
79             return;
80
81         gen7_avc_surface->base.frame_store_id = -1;
82         assert((obj_surface->size & 0x3f) == 0);
83         obj_surface->private_data = gen7_avc_surface;
84     }
85
86     /* DMV buffers now relate to the whole frame, irrespective of
87        field coding modes */
88     if (gen7_avc_surface->dmv_top == NULL) {
89         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90                                                  "direct mv w/r buffer",
91                                                  width_in_mbs * height_in_mbs * 128,
92                                                  0x1000);
93         assert(gen7_avc_surface->dmv_top);
94     }
95 }
96
97 static void
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99                           struct decode_state *decode_state,
100                           int standard_select,
101                           struct gen7_mfd_context *gen7_mfd_context)
102 {
103     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
104
105     assert(standard_select == MFX_FORMAT_MPEG2 ||
106            standard_select == MFX_FORMAT_AVC ||
107            standard_select == MFX_FORMAT_VC1 ||
108            standard_select == MFX_FORMAT_JPEG ||
109            standard_select == MFX_FORMAT_VP8);
110
111     BEGIN_BCS_BATCH(batch, 5);
112     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
113     OUT_BCS_BATCH(batch,
114                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
115                   (MFD_MODE_VLD << 15) | /* VLD mode */
116                   (0 << 10) | /* disable Stream-Out */
117                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
118                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
119                   (0 << 5)  | /* not in stitch mode */
120                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
121                   (standard_select << 0));
122     OUT_BCS_BATCH(batch,
123                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
124                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
125                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
126                   (0 << 1)  |
127                   (0 << 0));
128     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
129     OUT_BCS_BATCH(batch, 0); /* reserved */
130     ADVANCE_BCS_BATCH(batch);
131 }
132
133 static void
134 gen8_mfd_surface_state(VADriverContextP ctx,
135                        struct decode_state *decode_state,
136                        int standard_select,
137                        struct gen7_mfd_context *gen7_mfd_context)
138 {
139     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140     struct object_surface *obj_surface = decode_state->render_object;
141     unsigned int y_cb_offset;
142     unsigned int y_cr_offset;
143     unsigned int surface_format;
144
145     assert(obj_surface);
146
147     y_cb_offset = obj_surface->y_cb_offset;
148     y_cr_offset = obj_surface->y_cr_offset;
149
150     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151                      MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
152
153     BEGIN_BCS_BATCH(batch, 6);
154     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155     OUT_BCS_BATCH(batch, 0);
156     OUT_BCS_BATCH(batch,
157                   ((obj_surface->orig_height - 1) << 18) |
158                   ((obj_surface->orig_width - 1) << 4));
159     OUT_BCS_BATCH(batch,
160                   (surface_format << 28) | /* 420 planar YUV surface */
161                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162                   (0 << 22) | /* surface object control state, ignored */
163                   ((obj_surface->width - 1) << 3) | /* pitch */
164                   (0 << 2)  | /* must be 0 */
165                   (1 << 1)  | /* must be tiled */
166                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
167     OUT_BCS_BATCH(batch,
168                   (0 << 16) | /* X offset for U(Cb), must be 0 */
169                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
170     OUT_BCS_BATCH(batch,
171                   (0 << 16) | /* X offset for V(Cr), must be 0 */
172                   ((standard_select == MFX_FORMAT_JPEG ? y_cr_offset : 0) << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173     ADVANCE_BCS_BATCH(batch);
174 }
175
176 static void
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178                              struct decode_state *decode_state,
179                              int standard_select,
180                              struct gen7_mfd_context *gen7_mfd_context)
181 {
182     struct i965_driver_data *i965 = i965_driver_data(ctx);
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188     /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                         0);
193     else {
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197     }
198     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
199
200     /* Post-debloing 4-6 */
201     if (gen7_mfd_context->post_deblocking_output.valid)
202         OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
203                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                         0);
205     else {
206         OUT_BCS_BATCH(batch, 0);
207
208         OUT_BCS_BATCH(batch, 0);
209     }
210     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
211
212     /* uncompressed-video & stream out 7-12 */
213     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
214     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
215     OUT_BCS_BATCH(batch, 0);
216     OUT_BCS_BATCH(batch, 0);
217     OUT_BCS_BATCH(batch, 0);
218     OUT_BCS_BATCH(batch, 0);
219
220     /* intra row-store scratch 13-15 */
221     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
222         OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
223                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
224                         0);
225     else {
226         OUT_BCS_BATCH(batch, 0);
227
228         OUT_BCS_BATCH(batch, 0);
229     }
230     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
231
232     /* deblocking-filter-row-store 16-18 */
233     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
234         OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
235                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
236                         0);
237     else {
238         OUT_BCS_BATCH(batch, 0);
239         OUT_BCS_BATCH(batch, 0);
240     }
241
242     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
243
244     /* DW 19..50 */
245     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
246         struct object_surface *obj_surface;
247
248         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
249             gen7_mfd_context->reference_surface[i].obj_surface &&
250             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
251             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
252
253             OUT_BCS_RELOC64(batch, obj_surface->bo,
254                             I915_GEM_DOMAIN_INSTRUCTION, 0,
255                             0);
256         } else {
257             OUT_BCS_BATCH(batch, 0);
258             OUT_BCS_BATCH(batch, 0);
259         }
260
261     }
262
263     /* reference property 51 */
264     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
265
266     /* Macroblock status & ILDB 52-57 */
267     OUT_BCS_BATCH(batch, 0);
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271     OUT_BCS_BATCH(batch, 0);
272     OUT_BCS_BATCH(batch, 0);
273
274     /* the second Macroblock status 58-60 */
275     OUT_BCS_BATCH(batch, 0);
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch, 0);
278
279     ADVANCE_BCS_BATCH(batch);
280 }
281
282 static void
283 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
284                                  dri_bo *slice_data_bo,
285                                  int standard_select,
286                                  struct gen7_mfd_context *gen7_mfd_context)
287 {
288     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
289     struct i965_driver_data *i965 = i965_driver_data(ctx);
290
291     BEGIN_BCS_BATCH(batch, 26);
292     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
293     /* MFX In BS 1-5 */
294     OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
295     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
296     /* Upper bound 4-5 */
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299
300     /* MFX indirect MV 6-10 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307     /* MFX IT_COFF 11-15 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314     /* MFX IT_DBLK 16-20 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321     /* MFX PAK_BSE object for encoder 21-25 */
322     OUT_BCS_BATCH(batch, 0);
323     OUT_BCS_BATCH(batch, 0);
324     OUT_BCS_BATCH(batch, 0);
325     OUT_BCS_BATCH(batch, 0);
326     OUT_BCS_BATCH(batch, 0);
327
328     ADVANCE_BCS_BATCH(batch);
329 }
330
331 static void
332 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
333                                  struct decode_state *decode_state,
334                                  int standard_select,
335                                  struct gen7_mfd_context *gen7_mfd_context)
336 {
337     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
338     struct i965_driver_data *i965 = i965_driver_data(ctx);
339
340     BEGIN_BCS_BATCH(batch, 10);
341     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
342
343     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
344         OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
345                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
346                         0);
347     else {
348         OUT_BCS_BATCH(batch, 0);
349         OUT_BCS_BATCH(batch, 0);
350     }
351
352     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
353     /* MPR Row Store Scratch buffer 4-6 */
354     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
355         OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
356                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
357                         0);
358     else {
359         OUT_BCS_BATCH(batch, 0);
360         OUT_BCS_BATCH(batch, 0);
361     }
362
363     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
364
365     /* Bitplane 7-9 */
366     if (gen7_mfd_context->bitplane_read_buffer.valid)
367         OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
368                         I915_GEM_DOMAIN_INSTRUCTION, 0,
369                         0);
370     else {
371         OUT_BCS_BATCH(batch, 0);
372         OUT_BCS_BATCH(batch, 0);
373     }
374     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
375     ADVANCE_BCS_BATCH(batch);
376 }
377
378 static void
379 gen8_mfd_qm_state(VADriverContextP ctx,
380                   int qm_type,
381                   unsigned char *qm,
382                   int qm_length,
383                   struct gen7_mfd_context *gen7_mfd_context)
384 {
385     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
386     unsigned int qm_buffer[16];
387
388     assert(qm_length <= 16 * 4);
389     memcpy(qm_buffer, qm, qm_length);
390
391     BEGIN_BCS_BATCH(batch, 18);
392     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
393     OUT_BCS_BATCH(batch, qm_type << 0);
394     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
395     ADVANCE_BCS_BATCH(batch);
396 }
397
398 static void
399 gen8_mfd_avc_img_state(VADriverContextP ctx,
400                        struct decode_state *decode_state,
401                        struct gen7_mfd_context *gen7_mfd_context)
402 {
403     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
404     int img_struct;
405     int mbaff_frame_flag;
406     unsigned int width_in_mbs, height_in_mbs;
407     VAPictureParameterBufferH264 *pic_param;
408
409     assert(decode_state->pic_param && decode_state->pic_param->buffer);
410     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
412
413     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
414         img_struct = 1;
415     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
416         img_struct = 3;
417     else
418         img_struct = 0;
419
420     if ((img_struct & 0x1) == 0x1) {
421         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
422     } else {
423         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
424     }
425
426     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
427         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
428         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
429     } else {
430         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
431     }
432
433     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
434                         !pic_param->pic_fields.bits.field_pic_flag);
435
436     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
437     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
438
439     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
440     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
441            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
442     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
443
444     BEGIN_BCS_BATCH(batch, 17);
445     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
446     OUT_BCS_BATCH(batch,
447                   (width_in_mbs * height_in_mbs - 1));
448     OUT_BCS_BATCH(batch,
449                   ((height_in_mbs - 1) << 16) |
450                   ((width_in_mbs - 1) << 0));
451     OUT_BCS_BATCH(batch,
452                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
453                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
454                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
455                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
456                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
457                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
458                   (img_struct << 8));
459     OUT_BCS_BATCH(batch,
460                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
461                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
462                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
463                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
464                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
465                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
466                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
467                   (mbaff_frame_flag << 1) |
468                   (pic_param->pic_fields.bits.field_pic_flag << 0));
469     OUT_BCS_BATCH(batch, 0);
470     OUT_BCS_BATCH(batch, 0);
471     OUT_BCS_BATCH(batch, 0);
472     OUT_BCS_BATCH(batch, 0);
473     OUT_BCS_BATCH(batch, 0);
474     OUT_BCS_BATCH(batch, 0);
475     OUT_BCS_BATCH(batch, 0);
476     OUT_BCS_BATCH(batch, 0);
477     OUT_BCS_BATCH(batch, 0);
478     OUT_BCS_BATCH(batch, 0);
479     OUT_BCS_BATCH(batch, 0);
480     OUT_BCS_BATCH(batch, 0);
481     ADVANCE_BCS_BATCH(batch);
482 }
483
484 static void
485 gen8_mfd_avc_qm_state(VADriverContextP ctx,
486                       struct decode_state *decode_state,
487                       struct gen7_mfd_context *gen7_mfd_context)
488 {
489     VAIQMatrixBufferH264 *iq_matrix;
490     VAPictureParameterBufferH264 *pic_param;
491
492     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
493         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
494     else
495         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
496
497     assert(decode_state->pic_param && decode_state->pic_param->buffer);
498     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
499
500     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
501     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
502
503     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
504         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
505         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
506     }
507 }
508
509 static inline void
510 gen8_mfd_avc_picid_state(VADriverContextP ctx,
511                          struct decode_state *decode_state,
512                          struct gen7_mfd_context *gen7_mfd_context)
513 {
514     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
515                                gen7_mfd_context->reference_surface);
516 }
517
518 static void
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520                               struct decode_state *decode_state,
521                               VAPictureParameterBufferH264 *pic_param,
522                               VASliceParameterBufferH264 *slice_param,
523                               struct gen7_mfd_context *gen7_mfd_context)
524 {
525     struct i965_driver_data *i965 = i965_driver_data(ctx);
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
545                             I915_GEM_DOMAIN_INSTRUCTION, 0,
546                             0);
547         } else {
548             OUT_BCS_BATCH(batch, 0);
549             OUT_BCS_BATCH(batch, 0);
550         }
551     }
552
553     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
554
555     /* the current decoding frame/field */
556     va_pic = &pic_param->CurrPic;
557     obj_surface = decode_state->render_object;
558     assert(obj_surface->bo && obj_surface->private_data);
559     gen7_avc_surface = obj_surface->private_data;
560
561     OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
562                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
563                     0);
564
565     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
566
567     /* POC List */
568     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
569         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
570
571         if (obj_surface) {
572             const VAPictureH264 * const va_pic = avc_find_picture(
573                                                      obj_surface->base.id, pic_param->ReferenceFrames,
574                                                      ARRAY_ELEMS(pic_param->ReferenceFrames));
575
576             assert(va_pic != NULL);
577             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
578             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
579         } else {
580             OUT_BCS_BATCH(batch, 0);
581             OUT_BCS_BATCH(batch, 0);
582         }
583     }
584
585     va_pic = &pic_param->CurrPic;
586     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
587     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
588
589     ADVANCE_BCS_BATCH(batch);
590 }
591
592 static void
593 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
594                                  VAPictureParameterBufferH264 *pic_param,
595                                  VASliceParameterBufferH264 *next_slice_param,
596                                  struct gen7_mfd_context *gen7_mfd_context)
597 {
598     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
599 }
600
601 static void
602 gen8_mfd_avc_slice_state(VADriverContextP ctx,
603                          VAPictureParameterBufferH264 *pic_param,
604                          VASliceParameterBufferH264 *slice_param,
605                          VASliceParameterBufferH264 *next_slice_param,
606                          struct gen7_mfd_context *gen7_mfd_context)
607 {
608     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
609     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
610     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
611     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
612     int num_ref_idx_l0, num_ref_idx_l1;
613     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
614                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
615     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
616     int slice_type;
617
618     if (slice_param->slice_type == SLICE_TYPE_I ||
619         slice_param->slice_type == SLICE_TYPE_SI) {
620         slice_type = SLICE_TYPE_I;
621     } else if (slice_param->slice_type == SLICE_TYPE_P ||
622                slice_param->slice_type == SLICE_TYPE_SP) {
623         slice_type = SLICE_TYPE_P;
624     } else {
625         assert(slice_param->slice_type == SLICE_TYPE_B);
626         slice_type = SLICE_TYPE_B;
627     }
628
629     if (slice_type == SLICE_TYPE_I) {
630         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
631         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
632         num_ref_idx_l0 = 0;
633         num_ref_idx_l1 = 0;
634     } else if (slice_type == SLICE_TYPE_P) {
635         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
637         num_ref_idx_l1 = 0;
638     } else {
639         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
640         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
641     }
642
643     first_mb_in_slice = slice_param->first_mb_in_slice;
644     slice_hor_pos = first_mb_in_slice % width_in_mbs;
645     slice_ver_pos = first_mb_in_slice / width_in_mbs;
646
647     if (mbaff_picture)
648         slice_ver_pos = slice_ver_pos << 1;
649     if (next_slice_param) {
650         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
651         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
652         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
653
654         if (mbaff_picture)
655             next_slice_ver_pos = next_slice_ver_pos << 1;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch,
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch,
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch,
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) |
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch,
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch,
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
839                                        gen7_mfd_context->reference_surface);
840     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
841     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
842     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
843     assert(height_in_mbs > 0 && height_in_mbs <= 256);
844
845     /* Current decoded picture */
846     obj_surface = decode_state->render_object;
847     if (pic_param->pic_fields.bits.reference_pic_flag)
848         obj_surface->flags |= SURFACE_REFERENCED;
849     else
850         obj_surface->flags &= ~SURFACE_REFERENCED;
851
852     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
853     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
854
855     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
856     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
857     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
858     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
859
860     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
861     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
862     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
863     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
864
865     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
866     bo = dri_bo_alloc(i965->intel.bufmgr,
867                       "intra row store",
868                       width_in_mbs * 64,
869                       0x1000);
870     assert(bo);
871     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
872     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
873
874     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
875     bo = dri_bo_alloc(i965->intel.bufmgr,
876                       "deblocking filter row store",
877                       width_in_mbs * 64 * 4,
878                       0x1000);
879     assert(bo);
880     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
881     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
882
883     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
884     bo = dri_bo_alloc(i965->intel.bufmgr,
885                       "bsd mpc row store",
886                       width_in_mbs * 64 * 2,
887                       0x1000);
888     assert(bo);
889     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
890     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
891
892     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
893     bo = dri_bo_alloc(i965->intel.bufmgr,
894                       "mpr row store",
895                       width_in_mbs * 64 * 2,
896                       0x1000);
897     assert(bo);
898     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
899     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
900
901     gen7_mfd_context->bitplane_read_buffer.valid = 0;
902 }
903
904 static void
905 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
906                             struct decode_state *decode_state,
907                             struct gen7_mfd_context *gen7_mfd_context)
908 {
909     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
910     VAPictureParameterBufferH264 *pic_param;
911     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
912     dri_bo *slice_data_bo;
913     int i, j;
914
915     assert(decode_state->pic_param && decode_state->pic_param->buffer);
916     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
917     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
918
919     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
920     intel_batchbuffer_emit_mi_flush(batch);
921     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
923     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
924     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
925     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
926     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
927     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
928
929     for (j = 0; j < decode_state->num_slice_params; j++) {
930         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
931         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
932         slice_data_bo = decode_state->slice_datas[j]->bo;
933         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
934
935         if (j == decode_state->num_slice_params - 1)
936             next_slice_group_param = NULL;
937         else
938             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
939
940         if (j == 0 && slice_param->first_mb_in_slice)
941             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
942
943         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
944             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
945             assert((slice_param->slice_type == SLICE_TYPE_I) ||
946                    (slice_param->slice_type == SLICE_TYPE_SI) ||
947                    (slice_param->slice_type == SLICE_TYPE_P) ||
948                    (slice_param->slice_type == SLICE_TYPE_SP) ||
949                    (slice_param->slice_type == SLICE_TYPE_B));
950
951             if (i < decode_state->slice_params[j]->num_elements - 1)
952                 next_slice_param = slice_param + 1;
953             else
954                 next_slice_param = next_slice_group_param;
955
956             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
957             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
958             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
959             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
960             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
961             slice_param++;
962         }
963     }
964
965     intel_batchbuffer_end_atomic(batch);
966     intel_batchbuffer_flush(batch);
967 }
968
969 static void
970 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
971                            struct decode_state *decode_state,
972                            struct gen7_mfd_context *gen7_mfd_context)
973 {
974     VAPictureParameterBufferMPEG2 *pic_param;
975     struct i965_driver_data *i965 = i965_driver_data(ctx);
976     struct object_surface *obj_surface;
977     dri_bo *bo;
978     unsigned int width_in_mbs;
979
980     assert(decode_state->pic_param && decode_state->pic_param->buffer);
981     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
982     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
983
984     mpeg2_set_reference_surfaces(
985         ctx,
986         gen7_mfd_context->reference_surface,
987         decode_state,
988         pic_param
989     );
990
991     /* Current decoded picture */
992     obj_surface = decode_state->render_object;
993     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
994
995     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
996     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
997     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
998     gen7_mfd_context->pre_deblocking_output.valid = 1;
999
1000     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1001     bo = dri_bo_alloc(i965->intel.bufmgr,
1002                       "bsd mpc row store",
1003                       width_in_mbs * 96,
1004                       0x1000);
1005     assert(bo);
1006     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1007     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1008
1009     gen7_mfd_context->post_deblocking_output.valid = 0;
1010     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1011     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1012     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1013     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1014 }
1015
1016 static void
1017 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1018                          struct decode_state *decode_state,
1019                          struct gen7_mfd_context *gen7_mfd_context)
1020 {
1021     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1022     VAPictureParameterBufferMPEG2 *pic_param;
1023     unsigned int slice_concealment_disable_bit = 0;
1024
1025     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1026     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1027
1028     slice_concealment_disable_bit = 1;
1029
1030     BEGIN_BCS_BATCH(batch, 13);
1031     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1032     OUT_BCS_BATCH(batch,
1033                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1034                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1035                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1036                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1037                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1038                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1039                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1040                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1041                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1042                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1043                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1044                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1045     OUT_BCS_BATCH(batch,
1046                   pic_param->picture_coding_type << 9);
1047     OUT_BCS_BATCH(batch,
1048                   (slice_concealment_disable_bit << 31) |
1049                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1050                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1051     OUT_BCS_BATCH(batch, 0);
1052     OUT_BCS_BATCH(batch, 0);
1053     OUT_BCS_BATCH(batch, 0);
1054     OUT_BCS_BATCH(batch, 0);
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     ADVANCE_BCS_BATCH(batch);
1061 }
1062
1063 static void
1064 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1065                         struct decode_state *decode_state,
1066                         struct gen7_mfd_context *gen7_mfd_context)
1067 {
1068     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1069     int i, j;
1070
1071     /* Update internal QM state */
1072     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1073         VAIQMatrixBufferMPEG2 * const iq_matrix =
1074             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1075
1076         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1077             iq_matrix->load_intra_quantiser_matrix) {
1078             gen_iq_matrix->load_intra_quantiser_matrix =
1079                 iq_matrix->load_intra_quantiser_matrix;
1080             if (iq_matrix->load_intra_quantiser_matrix) {
1081                 for (j = 0; j < 64; j++)
1082                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1083                         iq_matrix->intra_quantiser_matrix[j];
1084             }
1085         }
1086
1087         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1088             iq_matrix->load_non_intra_quantiser_matrix) {
1089             gen_iq_matrix->load_non_intra_quantiser_matrix =
1090                 iq_matrix->load_non_intra_quantiser_matrix;
1091             if (iq_matrix->load_non_intra_quantiser_matrix) {
1092                 for (j = 0; j < 64; j++)
1093                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1094                         iq_matrix->non_intra_quantiser_matrix[j];
1095             }
1096         }
1097     }
1098
1099     /* Commit QM state to HW */
1100     for (i = 0; i < 2; i++) {
1101         unsigned char *qm = NULL;
1102         int qm_type;
1103
1104         if (i == 0) {
1105             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1106                 qm = gen_iq_matrix->intra_quantiser_matrix;
1107                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1108             }
1109         } else {
1110             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1111                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1112                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1113             }
1114         }
1115
1116         if (!qm)
1117             continue;
1118
1119         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1120     }
1121 }
1122
1123 static void
1124 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1125                           VAPictureParameterBufferMPEG2 *pic_param,
1126                           VASliceParameterBufferMPEG2 *slice_param,
1127                           VASliceParameterBufferMPEG2 *next_slice_param,
1128                           struct gen7_mfd_context *gen7_mfd_context)
1129 {
1130     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1131     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1132     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1133
1134     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1135         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1136         is_field_pic = 1;
1137     is_field_pic_wa = is_field_pic &&
1138                       gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1139
1140     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1141     hpos0 = slice_param->slice_horizontal_position;
1142
1143     if (next_slice_param == NULL) {
1144         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1145         hpos1 = 0;
1146     } else {
1147         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1148         hpos1 = next_slice_param->slice_horizontal_position;
1149     }
1150
1151     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1152
1153     BEGIN_BCS_BATCH(batch, 5);
1154     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1155     OUT_BCS_BATCH(batch,
1156                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1157     OUT_BCS_BATCH(batch,
1158                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1159     OUT_BCS_BATCH(batch,
1160                   hpos0 << 24 |
1161                   vpos0 << 16 |
1162                   mb_count << 8 |
1163                   (next_slice_param == NULL) << 5 |
1164                   (next_slice_param == NULL) << 3 |
1165                   (slice_param->macroblock_offset & 0x7));
1166     OUT_BCS_BATCH(batch,
1167                   (slice_param->quantiser_scale_code << 24) |
1168                   (vpos1 << 8 | hpos1));
1169     ADVANCE_BCS_BATCH(batch);
1170 }
1171
1172 static void
1173 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1174                               struct decode_state *decode_state,
1175                               struct gen7_mfd_context *gen7_mfd_context)
1176 {
1177     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1178     VAPictureParameterBufferMPEG2 *pic_param;
1179     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1180     dri_bo *slice_data_bo;
1181     int i, j;
1182
1183     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1184     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1185
1186     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1187     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1188     intel_batchbuffer_emit_mi_flush(batch);
1189     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1194     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1195
1196     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1197         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1198             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1199
1200     for (j = 0; j < decode_state->num_slice_params; j++) {
1201         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1202         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1203         slice_data_bo = decode_state->slice_datas[j]->bo;
1204         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1205
1206         if (j == decode_state->num_slice_params - 1)
1207             next_slice_group_param = NULL;
1208         else
1209             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1210
1211         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1212             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1213
1214             if (i < decode_state->slice_params[j]->num_elements - 1)
1215                 next_slice_param = slice_param + 1;
1216             else
1217                 next_slice_param = next_slice_group_param;
1218
1219             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1220             slice_param++;
1221         }
1222     }
1223
1224     intel_batchbuffer_end_atomic(batch);
1225     intel_batchbuffer_flush(batch);
1226 }
1227
1228 static const int va_to_gen7_vc1_mv[4] = {
1229     1, /* 1-MV */
1230     2, /* 1-MV half-pel */
1231     3, /* 1-MV half-pef bilinear */
1232     0, /* Mixed MV */
1233 };
1234
1235 static const int b_picture_scale_factor[21] = {
1236     128, 85,  170, 64,  192,
1237     51,  102, 153, 204, 43,
1238     215, 37,  74,  111, 148,
1239     185, 222, 32,  96,  160,
1240     224,
1241 };
1242
1243 static const int va_to_gen7_vc1_condover[3] = {
1244     0,
1245     2,
1246     3
1247 };
1248
1249 static const int va_to_gen7_vc1_profile[4] = {
1250     GEN7_VC1_SIMPLE_PROFILE,
1251     GEN7_VC1_MAIN_PROFILE,
1252     GEN7_VC1_RESERVED_PROFILE,
1253     GEN7_VC1_ADVANCED_PROFILE
1254 };
1255
1256 static void
1257 gen8_mfd_free_vc1_surface(void **data)
1258 {
1259     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1260
1261     if (!gen7_vc1_surface)
1262         return;
1263
1264     dri_bo_unreference(gen7_vc1_surface->dmv);
1265     free(gen7_vc1_surface);
1266     *data = NULL;
1267 }
1268
1269 static void
1270 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1271                           VAPictureParameterBufferVC1 *pic_param,
1272                           struct object_surface *obj_surface)
1273 {
1274     struct i965_driver_data *i965 = i965_driver_data(ctx);
1275     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1276     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1277
1278     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1279
1280     if (!gen7_vc1_surface) {
1281         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1282
1283         if (!gen7_vc1_surface)
1284             return;
1285
1286         assert((obj_surface->size & 0x3f) == 0);
1287         obj_surface->private_data = gen7_vc1_surface;
1288     }
1289
1290     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1291     gen7_vc1_surface->intensity_compensation = 0;
1292     gen7_vc1_surface->luma_scale = 0;
1293     gen7_vc1_surface->luma_shift = 0;
1294
1295     if (gen7_vc1_surface->dmv == NULL) {
1296         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1297                                              "direct mv w/r buffer",
1298                                              128 * height_in_mbs * 64,
1299                                              0x1000);
1300     }
1301 }
1302
1303 static void
1304 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1305                          struct decode_state *decode_state,
1306                          struct gen7_mfd_context *gen7_mfd_context)
1307 {
1308     VAPictureParameterBufferVC1 *pic_param;
1309     struct i965_driver_data *i965 = i965_driver_data(ctx);
1310     struct object_surface *obj_surface;
1311     dri_bo *bo;
1312     int width_in_mbs;
1313     int picture_type;
1314     int intensity_compensation;
1315
1316     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1317     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1318     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1319     picture_type = pic_param->picture_fields.bits.picture_type;
1320     intensity_compensation = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1321
1322     intel_update_vc1_frame_store_index(ctx,
1323                                        decode_state,
1324                                        pic_param,
1325                                        gen7_mfd_context->reference_surface);
1326
1327     /* Forward reference picture */
1328     obj_surface = decode_state->reference_objects[0];
1329     if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1330         obj_surface &&
1331         obj_surface->private_data) {
1332         if (picture_type == 1 && intensity_compensation) { /* P picture */
1333             struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1334
1335             gen7_vc1_surface->intensity_compensation = intensity_compensation;
1336             gen7_vc1_surface->luma_scale = pic_param->luma_scale;
1337             gen7_vc1_surface->luma_shift = pic_param->luma_shift;
1338         }
1339     }
1340
1341     /* Current decoded picture */
1342     obj_surface = decode_state->render_object;
1343     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1344     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1345
1346     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1347     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1348     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1349
1350     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1351     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1352     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1353
1354     if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1355         gen7_mfd_context->post_deblocking_output.valid = 0;
1356         gen7_mfd_context->pre_deblocking_output.valid = 1;
1357     } else {
1358         gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1359         gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1360     }
1361
1362     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1363     bo = dri_bo_alloc(i965->intel.bufmgr,
1364                       "intra row store",
1365                       width_in_mbs * 64,
1366                       0x1000);
1367     assert(bo);
1368     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1369     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1370
1371     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1372     bo = dri_bo_alloc(i965->intel.bufmgr,
1373                       "deblocking filter row store",
1374                       width_in_mbs * 7 * 64,
1375                       0x1000);
1376     assert(bo);
1377     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1378     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1379
1380     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1381     bo = dri_bo_alloc(i965->intel.bufmgr,
1382                       "bsd mpc row store",
1383                       width_in_mbs * 96,
1384                       0x1000);
1385     assert(bo);
1386     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1387     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1388
1389     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1390
1391     if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1392         gen7_mfd_context->bitplane_read_buffer.valid = 1;
1393     else
1394         gen7_mfd_context->bitplane_read_buffer.valid = !!(pic_param->bitplane_present.value & 0x7f);
1395     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1396
1397     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1398         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1399         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1400         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1401         int src_w, src_h;
1402         uint8_t *src = NULL, *dst = NULL;
1403
1404         bo = dri_bo_alloc(i965->intel.bufmgr,
1405                           "VC-1 Bitplane",
1406                           bitplane_width * height_in_mbs,
1407                           0x1000);
1408         assert(bo);
1409         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1410
1411         dri_bo_map(bo, True);
1412         assert(bo->virtual);
1413         dst = bo->virtual;
1414
1415         if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1416             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1417                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1418                     int dst_index;
1419                     uint8_t src_value = 0x2;
1420
1421                     dst_index = src_w / 2;
1422                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1423                 }
1424
1425                 if (src_w & 1)
1426                     dst[src_w / 2] >>= 4;
1427
1428                 dst += bitplane_width;
1429             }
1430         } else {
1431             assert(decode_state->bit_plane->buffer);
1432             src = decode_state->bit_plane->buffer;
1433
1434             for (src_h = 0; src_h < height_in_mbs; src_h++) {
1435                 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1436                     int src_index, dst_index;
1437                     int src_shift;
1438                     uint8_t src_value;
1439
1440                     src_index = (src_h * width_in_mbs + src_w) / 2;
1441                     src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1442                     src_value = ((src[src_index] >> src_shift) & 0xf);
1443
1444                     dst_index = src_w / 2;
1445                     dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1446                 }
1447
1448                 if (src_w & 1)
1449                     dst[src_w / 2] >>= 4;
1450
1451                 dst += bitplane_width;
1452             }
1453         }
1454
1455         dri_bo_unmap(bo);
1456     } else
1457         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1458 }
1459
1460 static void
1461 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1462                        struct decode_state *decode_state,
1463                        struct gen7_mfd_context *gen7_mfd_context)
1464 {
1465     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1466     VAPictureParameterBufferVC1 *pic_param;
1467     struct object_surface *obj_surface;
1468     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1469     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1470     int unified_mv_mode;
1471     int ref_field_pic_polarity = 0;
1472     int scale_factor = 0;
1473     int trans_ac_y = 0;
1474     int dmv_surface_valid = 0;
1475     int brfd = 0;
1476     int fcm = 0;
1477     int picture_type;
1478     int ptype;
1479     int profile;
1480     int overlap = 0;
1481     int interpolation_mode = 0;
1482     int loopfilter = 0;
1483     int bitplane_present;
1484     int forward_mb = 0, mv_type_mb = 0, skip_mb = 0, direct_mb = 0;
1485     int overflags = 0, ac_pred = 0, field_tx = 0;
1486
1487     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1488     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1489
1490     picture_type = pic_param->picture_fields.bits.picture_type;
1491
1492     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1493     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1494     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1495     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1496     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1497     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1498     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1499     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1500
1501     if (dquant == 0) {
1502         alt_pquant_config = 0;
1503         alt_pquant_edge_mask = 0;
1504     } else if (dquant == 2) {
1505         alt_pquant_config = 1;
1506         alt_pquant_edge_mask = 0xf;
1507     } else {
1508         assert(dquant == 1);
1509         if (dquantfrm == 0) {
1510             alt_pquant_config = 0;
1511             alt_pquant_edge_mask = 0;
1512             alt_pq = 0;
1513         } else {
1514             assert(dquantfrm == 1);
1515             alt_pquant_config = 1;
1516
1517             switch (dqprofile) {
1518             case 3:
1519                 if (dqbilevel == 0) {
1520                     alt_pquant_config = 2;
1521                     alt_pquant_edge_mask = 0;
1522                 } else {
1523                     assert(dqbilevel == 1);
1524                     alt_pquant_config = 3;
1525                     alt_pquant_edge_mask = 0;
1526                 }
1527                 break;
1528
1529             case 0:
1530                 alt_pquant_edge_mask = 0xf;
1531                 break;
1532
1533             case 1:
1534                 if (dqdbedge == 3)
1535                     alt_pquant_edge_mask = 0x9;
1536                 else
1537                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1538
1539                 break;
1540
1541             case 2:
1542                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1543                 break;
1544
1545             default:
1546                 assert(0);
1547             }
1548         }
1549     }
1550
1551     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1552         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1553         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1554     } else {
1555         assert(pic_param->mv_fields.bits.mv_mode < 4);
1556         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1557     }
1558
1559     if (pic_param->sequence_fields.bits.interlace == 1 &&
1560         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1561         /* FIXME: calculate reference field picture polarity */
1562         assert(0);
1563         ref_field_pic_polarity = 0;
1564     }
1565
1566     if (pic_param->b_picture_fraction < 21)
1567         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1568
1569     if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1570         ptype = GEN7_VC1_P_PICTURE;
1571         bitplane_present = 1;
1572     } else {
1573         ptype = pic_param->picture_fields.bits.picture_type;
1574         bitplane_present = !!(pic_param->bitplane_present.value & 0x7f);
1575         forward_mb = pic_param->raw_coding.flags.forward_mb;
1576         mv_type_mb = pic_param->raw_coding.flags.mv_type_mb;
1577         skip_mb = pic_param->raw_coding.flags.skip_mb;
1578         direct_mb = pic_param->raw_coding.flags.direct_mb;
1579         overflags = pic_param->raw_coding.flags.overflags;
1580         ac_pred = pic_param->raw_coding.flags.ac_pred;
1581         field_tx = pic_param->raw_coding.flags.field_tx;
1582         loopfilter = pic_param->entrypoint_fields.bits.loopfilter;
1583     }
1584
1585     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1586         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1587     else {
1588         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1589
1590         /*
1591          * 8.3.6.2.1 Transform Type Selection
1592          * If variable-sized transform coding is not enabled,
1593          * then the 8x8 transform shall be used for all blocks.
1594          * it is also MFX_VC1_PIC_STATE requirement.
1595          */
1596         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1597             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1598             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1599         }
1600     }
1601
1602     if (picture_type == GEN7_VC1_B_PICTURE) {
1603         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1604
1605         obj_surface = decode_state->reference_objects[1];
1606
1607         if (obj_surface)
1608             gen7_vc1_surface = obj_surface->private_data;
1609
1610         if (gen7_vc1_surface &&
1611             gen7_vc1_surface->picture_type == GEN7_VC1_P_PICTURE)
1612             dmv_surface_valid = 1;
1613     }
1614
1615     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1616
1617     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1618         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1619     else {
1620         if (pic_param->picture_fields.bits.top_field_first)
1621             fcm = 2;
1622         else
1623             fcm = 3;
1624     }
1625
1626     if (picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1627         brfd = pic_param->reference_fields.bits.reference_distance;
1628         brfd = (scale_factor * brfd) >> 8;
1629         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1630
1631         if (brfd < 0)
1632             brfd = 0;
1633     }
1634
1635     if (pic_param->sequence_fields.bits.overlap) {
1636         if (profile == GEN7_VC1_ADVANCED_PROFILE) {
1637             if (picture_type == GEN7_VC1_P_PICTURE &&
1638                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1639                 overlap = 1;
1640             }
1641             if (picture_type == GEN7_VC1_I_PICTURE ||
1642                 picture_type == GEN7_VC1_BI_PICTURE) {
1643                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1644                     overlap = 1;
1645                 } else if (pic_param->conditional_overlap_flag == 1 || /* all block boundaries */
1646                            pic_param->conditional_overlap_flag == 2) { /* coded by OVERFLAGSMB bitplane */
1647                     overlap = 1;
1648                 }
1649             }
1650         } else {
1651             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1652                 picture_type != GEN7_VC1_B_PICTURE) {
1653                 overlap = 1;
1654             }
1655         }
1656     }
1657
1658     assert(pic_param->conditional_overlap_flag < 3);
1659     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1660
1661     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1662         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1663          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1664         interpolation_mode = 9; /* Half-pel bilinear */
1665     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1666              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1667               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1668         interpolation_mode = 1; /* Half-pel bicubic */
1669     else
1670         interpolation_mode = 0; /* Quarter-pel bicubic */
1671
1672     BEGIN_BCS_BATCH(batch, 6);
1673     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1674     OUT_BCS_BATCH(batch,
1675                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1676                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1677     OUT_BCS_BATCH(batch,
1678                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1679                   dmv_surface_valid << 15 |
1680                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1681                   pic_param->rounding_control << 13 |
1682                   pic_param->sequence_fields.bits.syncmarker << 12 |
1683                   interpolation_mode << 8 |
1684                   0 << 7 | /* FIXME: scale up or down ??? */
1685                   pic_param->range_reduction_frame << 6 |
1686                   loopfilter << 5 |
1687                   overlap << 4 |
1688                   !pic_param->picture_fields.bits.is_first_field << 3 |
1689                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1690     OUT_BCS_BATCH(batch,
1691                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1692                   ptype << 26 |
1693                   fcm << 24 |
1694                   alt_pq << 16 |
1695                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1696                   scale_factor << 0);
1697     OUT_BCS_BATCH(batch,
1698                   unified_mv_mode << 28 |
1699                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1700                   pic_param->fast_uvmc_flag << 26 |
1701                   ref_field_pic_polarity << 25 |
1702                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1703                   pic_param->reference_fields.bits.reference_distance << 20 |
1704                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1705                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1706                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1707                   alt_pquant_edge_mask << 4 |
1708                   alt_pquant_config << 2 |
1709                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1710                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1711     OUT_BCS_BATCH(batch,
1712                   bitplane_present << 31 |
1713                   forward_mb << 30 |
1714                   mv_type_mb << 29 |
1715                   skip_mb << 28 |
1716                   direct_mb << 27 |
1717                   overflags << 26 |
1718                   ac_pred << 25 |
1719                   field_tx << 24 |
1720                   pic_param->mv_fields.bits.mv_table << 20 |
1721                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1722                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1723                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1724                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1725                   pic_param->mb_mode_table << 8 |
1726                   trans_ac_y << 6 |
1727                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1728                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1729                   pic_param->cbp_table << 0);
1730     ADVANCE_BCS_BATCH(batch);
1731 }
1732
1733 static void
1734 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1735                              struct decode_state *decode_state,
1736                              struct gen7_mfd_context *gen7_mfd_context)
1737 {
1738     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1739     VAPictureParameterBufferVC1 *pic_param;
1740     int picture_type;
1741     int intensitycomp_single_fwd = 0;
1742     int luma_scale1 = 0;
1743     int luma_shift1 = 0;
1744
1745     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1746     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1747     picture_type = pic_param->picture_fields.bits.picture_type;
1748
1749     if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID) {
1750         if (picture_type == 1 || picture_type == 2) { /* P/B picture */
1751             struct gen7_vc1_surface *gen7_vc1_surface = gen7_mfd_context->reference_surface[0].obj_surface->private_data;
1752             if (gen7_vc1_surface) {
1753                 intensitycomp_single_fwd = gen7_vc1_surface->intensity_compensation;
1754                 luma_scale1 = gen7_vc1_surface->luma_scale;
1755                 luma_shift1 = gen7_vc1_surface->luma_shift;
1756             }
1757         }
1758     }
1759
1760     BEGIN_BCS_BATCH(batch, 6);
1761     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1762     OUT_BCS_BATCH(batch,
1763                   0 << 14 | /* FIXME: double ??? */
1764                   0 << 12 |
1765                   intensitycomp_single_fwd << 10 |
1766                   0 << 8 |
1767                   0 << 4 | /* FIXME: interlace mode */
1768                   0);
1769     OUT_BCS_BATCH(batch,
1770                   luma_shift1 << 16 |
1771                   luma_scale1 << 0);
1772     OUT_BCS_BATCH(batch, 0);
1773     OUT_BCS_BATCH(batch, 0);
1774     OUT_BCS_BATCH(batch, 0);
1775     ADVANCE_BCS_BATCH(batch);
1776 }
1777
1778 static void
1779 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1780                               struct decode_state *decode_state,
1781                               struct gen7_mfd_context *gen7_mfd_context)
1782 {
1783     struct i965_driver_data *i965 = i965_driver_data(ctx);
1784     VAPictureParameterBufferVC1 *pic_param;
1785     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1786     struct object_surface *obj_surface;
1787     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1788     int picture_type;
1789
1790     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1791     picture_type = pic_param->picture_fields.bits.picture_type;
1792
1793     if (picture_type == GEN7_VC1_P_PICTURE ||
1794         picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1795         obj_surface = decode_state->render_object;
1796         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1797     }
1798
1799     if (picture_type == GEN7_VC1_B_PICTURE) {
1800         obj_surface = decode_state->reference_objects[1];
1801         if (pic_param->backward_reference_picture != VA_INVALID_ID &&
1802             obj_surface) {
1803             dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1804         }
1805     }
1806
1807     BEGIN_BCS_BATCH(batch, 7);
1808     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1809
1810     if (dmv_write_buffer)
1811         OUT_BCS_RELOC64(batch, dmv_write_buffer,
1812                         I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1813                         0);
1814     else {
1815         OUT_BCS_BATCH(batch, 0);
1816         OUT_BCS_BATCH(batch, 0);
1817     }
1818
1819     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1820
1821     if (dmv_read_buffer)
1822         OUT_BCS_RELOC64(batch, dmv_read_buffer,
1823                         I915_GEM_DOMAIN_INSTRUCTION, 0,
1824                         0);
1825     else {
1826         OUT_BCS_BATCH(batch, 0);
1827         OUT_BCS_BATCH(batch, 0);
1828     }
1829
1830     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
1831
1832     ADVANCE_BCS_BATCH(batch);
1833 }
1834
1835 static int
1836 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1837 {
1838     int out_slice_data_bit_offset;
1839     int slice_header_size = in_slice_data_bit_offset / 8;
1840     int i, j;
1841
1842     if (profile != 3)
1843         out_slice_data_bit_offset = in_slice_data_bit_offset;
1844     else {
1845         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1846             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1847                 if (i < slice_header_size - 1)
1848                     i++, j += 2;
1849                 else {
1850                     buf[j + 2] = buf[j + 1];
1851                     j++;
1852                 }
1853             }
1854         }
1855
1856         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1857     }
1858
1859     return out_slice_data_bit_offset;
1860 }
1861
1862 static void
1863 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1864                         VAPictureParameterBufferVC1 *pic_param,
1865                         VASliceParameterBufferVC1 *slice_param,
1866                         VASliceParameterBufferVC1 *next_slice_param,
1867                         dri_bo *slice_data_bo,
1868                         struct gen7_mfd_context *gen7_mfd_context)
1869 {
1870     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1871     int next_slice_start_vert_pos;
1872     int macroblock_offset;
1873     uint8_t *slice_data = NULL;
1874
1875     dri_bo_map(slice_data_bo, True);
1876     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1877     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1878                                                                slice_param->macroblock_offset,
1879                                                                pic_param->sequence_fields.bits.profile);
1880     dri_bo_unmap(slice_data_bo);
1881
1882     if (next_slice_param)
1883         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1884     else
1885         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1886
1887     BEGIN_BCS_BATCH(batch, 5);
1888     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1889     OUT_BCS_BATCH(batch,
1890                   slice_param->slice_data_size - (macroblock_offset >> 3));
1891     OUT_BCS_BATCH(batch,
1892                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1893     OUT_BCS_BATCH(batch,
1894                   slice_param->slice_vertical_position << 16 |
1895                   next_slice_start_vert_pos << 0);
1896     OUT_BCS_BATCH(batch,
1897                   (macroblock_offset & 0x7));
1898     ADVANCE_BCS_BATCH(batch);
1899 }
1900
1901 static void
1902 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1903                             struct decode_state *decode_state,
1904                             struct gen7_mfd_context *gen7_mfd_context)
1905 {
1906     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1907     VAPictureParameterBufferVC1 *pic_param;
1908     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1909     dri_bo *slice_data_bo;
1910     int i, j;
1911
1912     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1913     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1914
1915     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1916     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1917     intel_batchbuffer_emit_mi_flush(batch);
1918     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1919     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1920     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1921     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1922     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1923     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1924     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1925
1926     for (j = 0; j < decode_state->num_slice_params; j++) {
1927         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1928         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1929         slice_data_bo = decode_state->slice_datas[j]->bo;
1930         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1931
1932         if (j == decode_state->num_slice_params - 1)
1933             next_slice_group_param = NULL;
1934         else
1935             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1936
1937         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1938             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1939
1940             if (i < decode_state->slice_params[j]->num_elements - 1)
1941                 next_slice_param = slice_param + 1;
1942             else
1943                 next_slice_param = next_slice_group_param;
1944
1945             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1946             slice_param++;
1947         }
1948     }
1949
1950     intel_batchbuffer_end_atomic(batch);
1951     intel_batchbuffer_flush(batch);
1952 }
1953
1954 static void
1955 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1956                           struct decode_state *decode_state,
1957                           struct gen7_mfd_context *gen7_mfd_context)
1958 {
1959     struct object_surface *obj_surface;
1960     VAPictureParameterBufferJPEGBaseline *pic_param;
1961     int subsampling = SUBSAMPLE_YUV420;
1962     int fourcc = VA_FOURCC_IMC3;
1963
1964     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1965
1966     if (pic_param->num_components == 1) {
1967         subsampling = SUBSAMPLE_YUV400;
1968         fourcc = VA_FOURCC_Y800;
1969     } else if (pic_param->num_components == 3) {
1970         int h1 = pic_param->components[0].h_sampling_factor;
1971         int h2 = pic_param->components[1].h_sampling_factor;
1972         int h3 = pic_param->components[2].h_sampling_factor;
1973         int v1 = pic_param->components[0].v_sampling_factor;
1974         int v2 = pic_param->components[1].v_sampling_factor;
1975         int v3 = pic_param->components[2].v_sampling_factor;
1976
1977         if (h1 == 2 * h2 && h2 == h3 &&
1978             v1 == 2 * v2 && v2 == v3) {
1979             subsampling = SUBSAMPLE_YUV420;
1980             fourcc = VA_FOURCC_IMC3;
1981         } else if (h1 == 2 * h2  && h2 == h3 &&
1982                    v1 == v2 && v2 == v3) {
1983             subsampling = SUBSAMPLE_YUV422H;
1984             fourcc = VA_FOURCC_422H;
1985         } else if (h1 == h2 && h2 == h3 &&
1986                    v1 == v2  && v2 == v3) {
1987             subsampling = SUBSAMPLE_YUV444;
1988             fourcc = VA_FOURCC_444P;
1989         } else if (h1 == 4 * h2 && h2 ==  h3 &&
1990                    v1 == v2 && v2 == v3) {
1991             subsampling = SUBSAMPLE_YUV411;
1992             fourcc = VA_FOURCC_411P;
1993         } else if (h1 == h2 && h2 == h3 &&
1994                    v1 == 2 * v2 && v2 == v3) {
1995             subsampling = SUBSAMPLE_YUV422V;
1996             fourcc = VA_FOURCC_422V;
1997         } else
1998             assert(0);
1999     } else {
2000         assert(0);
2001     }
2002
2003     /* Current decoded picture */
2004     obj_surface = decode_state->render_object;
2005     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
2006
2007     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2008     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2009     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2010     gen7_mfd_context->pre_deblocking_output.valid = 1;
2011
2012     gen7_mfd_context->post_deblocking_output.bo = NULL;
2013     gen7_mfd_context->post_deblocking_output.valid = 0;
2014
2015     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2016     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2017
2018     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2019     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2020
2021     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2022     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2023
2024     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2025     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2026
2027     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2028     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2029 }
2030
2031 static const int va_to_gen7_jpeg_rotation[4] = {
2032     GEN7_JPEG_ROTATION_0,
2033     GEN7_JPEG_ROTATION_90,
2034     GEN7_JPEG_ROTATION_180,
2035     GEN7_JPEG_ROTATION_270
2036 };
2037
2038 static void
2039 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
2040                         struct decode_state *decode_state,
2041                         struct gen7_mfd_context *gen7_mfd_context)
2042 {
2043     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2044     VAPictureParameterBufferJPEGBaseline *pic_param;
2045     int chroma_type = GEN7_YUV420;
2046     int frame_width_in_blks;
2047     int frame_height_in_blks;
2048
2049     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2050     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2051
2052     if (pic_param->num_components == 1)
2053         chroma_type = GEN7_YUV400;
2054     else if (pic_param->num_components == 3) {
2055         int h1 = pic_param->components[0].h_sampling_factor;
2056         int h2 = pic_param->components[1].h_sampling_factor;
2057         int h3 = pic_param->components[2].h_sampling_factor;
2058         int v1 = pic_param->components[0].v_sampling_factor;
2059         int v2 = pic_param->components[1].v_sampling_factor;
2060         int v3 = pic_param->components[2].v_sampling_factor;
2061
2062         if (h1 == 2 * h2 && h2 == h3 &&
2063             v1 == 2 * v2 && v2 == v3)
2064             chroma_type = GEN7_YUV420;
2065         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2066                  v1 == 1 && v2 == 1 && v3 == 1)
2067             chroma_type = GEN7_YUV422H_2Y;
2068         else if (h1 == h2 && h2 == h3 &&
2069                  v1 == v2 && v2 == v3)
2070             chroma_type = GEN7_YUV444;
2071         else if (h1 == 4 * h2 && h2 == h3 &&
2072                  v1 == v2 && v2 == v3)
2073             chroma_type = GEN7_YUV411;
2074         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2075                  v1 == 2 && v2 == 1 && v3 == 1)
2076             chroma_type = GEN7_YUV422V_2Y;
2077         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2078                  v1 == 2 && v2 == 2 && v3 == 2)
2079             chroma_type = GEN7_YUV422H_4Y;
2080         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2081                  v1 == 2 && v2 == 1 && v3 == 1)
2082             chroma_type = GEN7_YUV422V_4Y;
2083         else
2084             assert(0);
2085     }
2086
2087     if (chroma_type == GEN7_YUV400 ||
2088         chroma_type == GEN7_YUV444 ||
2089         chroma_type == GEN7_YUV422V_2Y) {
2090         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2091         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2092     } else if (chroma_type == GEN7_YUV411) {
2093         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2094         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2095     } else {
2096         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2097         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2098     }
2099
2100     BEGIN_BCS_BATCH(batch, 3);
2101     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2102     OUT_BCS_BATCH(batch,
2103                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2104                   (chroma_type << 0));
2105     OUT_BCS_BATCH(batch,
2106                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2107                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2108     ADVANCE_BCS_BATCH(batch);
2109 }
2110
2111 static const int va_to_gen7_jpeg_hufftable[2] = {
2112     MFX_HUFFTABLE_ID_Y,
2113     MFX_HUFFTABLE_ID_UV
2114 };
2115
2116 static void
2117 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2118                                struct decode_state *decode_state,
2119                                struct gen7_mfd_context *gen7_mfd_context,
2120                                int num_tables)
2121 {
2122     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2123     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2124     int index;
2125
2126     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2127         return;
2128
2129     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2130
2131     for (index = 0; index < num_tables; index++) {
2132         int id = va_to_gen7_jpeg_hufftable[index];
2133         if (!huffman_table->load_huffman_table[index])
2134             continue;
2135         BEGIN_BCS_BATCH(batch, 53);
2136         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2137         OUT_BCS_BATCH(batch, id);
2138         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2139         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2140         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2141         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2142         ADVANCE_BCS_BATCH(batch);
2143     }
2144 }
2145
2146 static const int va_to_gen7_jpeg_qm[5] = {
2147     -1,
2148     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2149     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2150     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2151     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2152 };
2153
2154 static void
2155 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2156                        struct decode_state *decode_state,
2157                        struct gen7_mfd_context *gen7_mfd_context)
2158 {
2159     VAPictureParameterBufferJPEGBaseline *pic_param;
2160     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2161     int index;
2162
2163     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2164         return;
2165
2166     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2167     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2168
2169     assert(pic_param->num_components <= 3);
2170
2171     for (index = 0; index < pic_param->num_components; index++) {
2172         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2173         int qm_type;
2174         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2175         unsigned char raster_qm[64];
2176         int j;
2177
2178         if (id > 4 || id < 1)
2179             continue;
2180
2181         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2182             continue;
2183
2184         qm_type = va_to_gen7_jpeg_qm[id];
2185
2186         for (j = 0; j < 64; j++)
2187             raster_qm[zigzag_direct[j]] = qm[j];
2188
2189         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2190     }
2191 }
2192
2193 static void
2194 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2195                          VAPictureParameterBufferJPEGBaseline *pic_param,
2196                          VASliceParameterBufferJPEGBaseline *slice_param,
2197                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2198                          dri_bo *slice_data_bo,
2199                          struct gen7_mfd_context *gen7_mfd_context)
2200 {
2201     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2202     int scan_component_mask = 0;
2203     int i;
2204
2205     assert(slice_param->num_components > 0);
2206     assert(slice_param->num_components < 4);
2207     assert(slice_param->num_components <= pic_param->num_components);
2208
2209     for (i = 0; i < slice_param->num_components; i++) {
2210         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2211         case 1:
2212             scan_component_mask |= (1 << 0);
2213             break;
2214         case 2:
2215             scan_component_mask |= (1 << 1);
2216             break;
2217         case 3:
2218             scan_component_mask |= (1 << 2);
2219             break;
2220         default:
2221             assert(0);
2222             break;
2223         }
2224     }
2225
2226     BEGIN_BCS_BATCH(batch, 6);
2227     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2228     OUT_BCS_BATCH(batch,
2229                   slice_param->slice_data_size);
2230     OUT_BCS_BATCH(batch,
2231                   slice_param->slice_data_offset);
2232     OUT_BCS_BATCH(batch,
2233                   slice_param->slice_horizontal_position << 16 |
2234                   slice_param->slice_vertical_position << 0);
2235     OUT_BCS_BATCH(batch,
2236                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2237                   (scan_component_mask << 27) |                 /* scan components */
2238                   (0 << 26) |   /* disable interrupt allowed */
2239                   (slice_param->num_mcus << 0));                /* MCU count */
2240     OUT_BCS_BATCH(batch,
2241                   (slice_param->restart_interval << 0));    /* RestartInterval */
2242     ADVANCE_BCS_BATCH(batch);
2243 }
2244
2245 /* Workaround for JPEG decoding on Ivybridge */
2246 #ifdef JPEG_WA
2247
2248 static struct {
2249     int width;
2250     int height;
2251     unsigned char data[32];
2252     int data_size;
2253     int data_bit_offset;
2254     int qp;
2255 } gen7_jpeg_wa_clip = {
2256     16,
2257     16,
2258     {
2259         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2260         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2261     },
2262     14,
2263     40,
2264     28,
2265 };
2266
2267 static void
2268 gen8_jpeg_wa_init(VADriverContextP ctx,
2269                   struct gen7_mfd_context *gen7_mfd_context)
2270 {
2271     struct i965_driver_data *i965 = i965_driver_data(ctx);
2272     VAStatus status;
2273     struct object_surface *obj_surface;
2274
2275     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2276         i965_DestroySurfaces(ctx,
2277                              &gen7_mfd_context->jpeg_wa_surface_id,
2278                              1);
2279
2280     status = i965_CreateSurfaces(ctx,
2281                                  gen7_jpeg_wa_clip.width,
2282                                  gen7_jpeg_wa_clip.height,
2283                                  VA_RT_FORMAT_YUV420,
2284                                  1,
2285                                  &gen7_mfd_context->jpeg_wa_surface_id);
2286     assert(status == VA_STATUS_SUCCESS);
2287
2288     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2289     assert(obj_surface);
2290     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2291     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2292
2293     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2294         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2295                                                                "JPEG WA data",
2296                                                                0x1000,
2297                                                                0x1000);
2298         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2299                        0,
2300                        gen7_jpeg_wa_clip.data_size,
2301                        gen7_jpeg_wa_clip.data);
2302     }
2303 }
2304
2305 static void
2306 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2307                               struct gen7_mfd_context *gen7_mfd_context)
2308 {
2309     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2310
2311     BEGIN_BCS_BATCH(batch, 5);
2312     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2313     OUT_BCS_BATCH(batch,
2314                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2315                   (MFD_MODE_VLD << 15) | /* VLD mode */
2316                   (0 << 10) | /* disable Stream-Out */
2317                   (0 << 9)  | /* Post Deblocking Output */
2318                   (1 << 8)  | /* Pre Deblocking Output */
2319                   (0 << 5)  | /* not in stitch mode */
2320                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2321                   (MFX_FORMAT_AVC << 0));
2322     OUT_BCS_BATCH(batch,
2323                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2324                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2325                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2326                   (0 << 1)  |
2327                   (0 << 0));
2328     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2329     OUT_BCS_BATCH(batch, 0); /* reserved */
2330     ADVANCE_BCS_BATCH(batch);
2331 }
2332
2333 static void
2334 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2335                            struct gen7_mfd_context *gen7_mfd_context)
2336 {
2337     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2338     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2339
2340     BEGIN_BCS_BATCH(batch, 6);
2341     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2342     OUT_BCS_BATCH(batch, 0);
2343     OUT_BCS_BATCH(batch,
2344                   ((obj_surface->orig_width - 1) << 18) |
2345                   ((obj_surface->orig_height - 1) << 4));
2346     OUT_BCS_BATCH(batch,
2347                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2348                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2349                   (0 << 22) | /* surface object control state, ignored */
2350                   ((obj_surface->width - 1) << 3) | /* pitch */
2351                   (0 << 2)  | /* must be 0 */
2352                   (1 << 1)  | /* must be tiled */
2353                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2354     OUT_BCS_BATCH(batch,
2355                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2356                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2357     OUT_BCS_BATCH(batch,
2358                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2359                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2360     ADVANCE_BCS_BATCH(batch);
2361 }
2362
2363 static void
2364 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2365                                  struct gen7_mfd_context *gen7_mfd_context)
2366 {
2367     struct i965_driver_data *i965 = i965_driver_data(ctx);
2368     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2369     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2370     dri_bo *intra_bo;
2371     int i;
2372
2373     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2374                             "intra row store",
2375                             128 * 64,
2376                             0x1000);
2377
2378     BEGIN_BCS_BATCH(batch, 61);
2379     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2380     OUT_BCS_RELOC64(batch,
2381                     obj_surface->bo,
2382                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2383                     0);
2384     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2385
2386
2387     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2388     OUT_BCS_BATCH(batch, 0);
2389     OUT_BCS_BATCH(batch, 0);
2390
2391     /* uncompressed-video & stream out 7-12 */
2392     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2393     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2394     OUT_BCS_BATCH(batch, 0);
2395     OUT_BCS_BATCH(batch, 0);
2396     OUT_BCS_BATCH(batch, 0);
2397     OUT_BCS_BATCH(batch, 0);
2398
2399     /* the DW 13-15 is for intra row store scratch */
2400     OUT_BCS_RELOC64(batch,
2401                     intra_bo,
2402                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2403                     0);
2404
2405     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2406
2407     /* the DW 16-18 is for deblocking filter */
2408     OUT_BCS_BATCH(batch, 0);
2409     OUT_BCS_BATCH(batch, 0);
2410     OUT_BCS_BATCH(batch, 0);
2411
2412     /* DW 19..50 */
2413     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2414         OUT_BCS_BATCH(batch, 0);
2415         OUT_BCS_BATCH(batch, 0);
2416     }
2417     OUT_BCS_BATCH(batch, 0);
2418
2419     /* the DW52-54 is for mb status address */
2420     OUT_BCS_BATCH(batch, 0);
2421     OUT_BCS_BATCH(batch, 0);
2422     OUT_BCS_BATCH(batch, 0);
2423     /* the DW56-60 is for ILDB & second ILDB address */
2424     OUT_BCS_BATCH(batch, 0);
2425     OUT_BCS_BATCH(batch, 0);
2426     OUT_BCS_BATCH(batch, 0);
2427     OUT_BCS_BATCH(batch, 0);
2428     OUT_BCS_BATCH(batch, 0);
2429     OUT_BCS_BATCH(batch, 0);
2430
2431     ADVANCE_BCS_BATCH(batch);
2432
2433     dri_bo_unreference(intra_bo);
2434 }
2435
2436 static void
2437 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2438                                      struct gen7_mfd_context *gen7_mfd_context)
2439 {
2440     struct i965_driver_data *i965 = i965_driver_data(ctx);
2441     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2442     dri_bo *bsd_mpc_bo, *mpr_bo;
2443
2444     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2445                               "bsd mpc row store",
2446                               11520, /* 1.5 * 120 * 64 */
2447                               0x1000);
2448
2449     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2450                           "mpr row store",
2451                           7680, /* 1. 0 * 120 * 64 */
2452                           0x1000);
2453
2454     BEGIN_BCS_BATCH(batch, 10);
2455     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2456
2457     OUT_BCS_RELOC64(batch,
2458                     bsd_mpc_bo,
2459                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2460                     0);
2461
2462     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2463
2464     OUT_BCS_RELOC64(batch,
2465                     mpr_bo,
2466                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2467                     0);
2468     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2469
2470     OUT_BCS_BATCH(batch, 0);
2471     OUT_BCS_BATCH(batch, 0);
2472     OUT_BCS_BATCH(batch, 0);
2473
2474     ADVANCE_BCS_BATCH(batch);
2475
2476     dri_bo_unreference(bsd_mpc_bo);
2477     dri_bo_unreference(mpr_bo);
2478 }
2479
2480 static void
2481 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2482                           struct gen7_mfd_context *gen7_mfd_context)
2483 {
2484
2485 }
2486
2487 static void
2488 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2489                            struct gen7_mfd_context *gen7_mfd_context)
2490 {
2491     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2492     int img_struct = 0;
2493     int mbaff_frame_flag = 0;
2494     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2495
2496     BEGIN_BCS_BATCH(batch, 16);
2497     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2498     OUT_BCS_BATCH(batch,
2499                   width_in_mbs * height_in_mbs);
2500     OUT_BCS_BATCH(batch,
2501                   ((height_in_mbs - 1) << 16) |
2502                   ((width_in_mbs - 1) << 0));
2503     OUT_BCS_BATCH(batch,
2504                   (0 << 24) |
2505                   (0 << 16) |
2506                   (0 << 14) |
2507                   (0 << 13) |
2508                   (0 << 12) | /* differ from GEN6 */
2509                   (0 << 10) |
2510                   (img_struct << 8));
2511     OUT_BCS_BATCH(batch,
2512                   (1 << 10) | /* 4:2:0 */
2513                   (1 << 7) |  /* CABAC */
2514                   (0 << 6) |
2515                   (0 << 5) |
2516                   (0 << 4) |
2517                   (0 << 3) |
2518                   (1 << 2) |
2519                   (mbaff_frame_flag << 1) |
2520                   (0 << 0));
2521     OUT_BCS_BATCH(batch, 0);
2522     OUT_BCS_BATCH(batch, 0);
2523     OUT_BCS_BATCH(batch, 0);
2524     OUT_BCS_BATCH(batch, 0);
2525     OUT_BCS_BATCH(batch, 0);
2526     OUT_BCS_BATCH(batch, 0);
2527     OUT_BCS_BATCH(batch, 0);
2528     OUT_BCS_BATCH(batch, 0);
2529     OUT_BCS_BATCH(batch, 0);
2530     OUT_BCS_BATCH(batch, 0);
2531     OUT_BCS_BATCH(batch, 0);
2532     ADVANCE_BCS_BATCH(batch);
2533 }
2534
2535 static void
2536 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2537                                   struct gen7_mfd_context *gen7_mfd_context)
2538 {
2539     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2540     int i;
2541
2542     BEGIN_BCS_BATCH(batch, 71);
2543     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2544
2545     /* reference surfaces 0..15 */
2546     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2547         OUT_BCS_BATCH(batch, 0); /* top */
2548         OUT_BCS_BATCH(batch, 0); /* bottom */
2549     }
2550
2551     OUT_BCS_BATCH(batch, 0);
2552
2553     /* the current decoding frame/field */
2554     OUT_BCS_BATCH(batch, 0); /* top */
2555     OUT_BCS_BATCH(batch, 0);
2556     OUT_BCS_BATCH(batch, 0);
2557
2558     /* POC List */
2559     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2560         OUT_BCS_BATCH(batch, 0);
2561         OUT_BCS_BATCH(batch, 0);
2562     }
2563
2564     OUT_BCS_BATCH(batch, 0);
2565     OUT_BCS_BATCH(batch, 0);
2566
2567     ADVANCE_BCS_BATCH(batch);
2568 }
2569
2570 static void
2571 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2572                                      struct gen7_mfd_context *gen7_mfd_context)
2573 {
2574     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2575
2576     BEGIN_BCS_BATCH(batch, 11);
2577     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2578     OUT_BCS_RELOC64(batch,
2579                     gen7_mfd_context->jpeg_wa_slice_data_bo,
2580                     I915_GEM_DOMAIN_INSTRUCTION, 0,
2581                     0);
2582     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2583     OUT_BCS_BATCH(batch, 0);
2584     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2585     OUT_BCS_BATCH(batch, 0);
2586     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2587     OUT_BCS_BATCH(batch, 0);
2588     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2589     OUT_BCS_BATCH(batch, 0);
2590     ADVANCE_BCS_BATCH(batch);
2591 }
2592
2593 static void
2594 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2595                             struct gen7_mfd_context *gen7_mfd_context)
2596 {
2597     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2598
2599     /* the input bitsteam format on GEN7 differs from GEN6 */
2600     BEGIN_BCS_BATCH(batch, 6);
2601     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2602     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2603     OUT_BCS_BATCH(batch, 0);
2604     OUT_BCS_BATCH(batch,
2605                   (0 << 31) |
2606                   (0 << 14) |
2607                   (0 << 12) |
2608                   (0 << 10) |
2609                   (0 << 8));
2610     OUT_BCS_BATCH(batch,
2611                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2612                   (0 << 5)  |
2613                   (0 << 4)  |
2614                   (1 << 3) | /* LastSlice Flag */
2615                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2616     OUT_BCS_BATCH(batch, 0);
2617     ADVANCE_BCS_BATCH(batch);
2618 }
2619
2620 static void
2621 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2622                              struct gen7_mfd_context *gen7_mfd_context)
2623 {
2624     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2625     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2626     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2627     int first_mb_in_slice = 0;
2628     int slice_type = SLICE_TYPE_I;
2629
2630     BEGIN_BCS_BATCH(batch, 11);
2631     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2632     OUT_BCS_BATCH(batch, slice_type);
2633     OUT_BCS_BATCH(batch,
2634                   (num_ref_idx_l1 << 24) |
2635                   (num_ref_idx_l0 << 16) |
2636                   (0 << 8) |
2637                   (0 << 0));
2638     OUT_BCS_BATCH(batch,
2639                   (0 << 29) |
2640                   (1 << 27) |   /* disable Deblocking */
2641                   (0 << 24) |
2642                   (gen7_jpeg_wa_clip.qp << 16) |
2643                   (0 << 8) |
2644                   (0 << 0));
2645     OUT_BCS_BATCH(batch,
2646                   (slice_ver_pos << 24) |
2647                   (slice_hor_pos << 16) |
2648                   (first_mb_in_slice << 0));
2649     OUT_BCS_BATCH(batch,
2650                   (next_slice_ver_pos << 16) |
2651                   (next_slice_hor_pos << 0));
2652     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2653     OUT_BCS_BATCH(batch, 0);
2654     OUT_BCS_BATCH(batch, 0);
2655     OUT_BCS_BATCH(batch, 0);
2656     OUT_BCS_BATCH(batch, 0);
2657     ADVANCE_BCS_BATCH(batch);
2658 }
2659
2660 static void
2661 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2662                  struct gen7_mfd_context *gen7_mfd_context)
2663 {
2664     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2665     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2666     intel_batchbuffer_emit_mi_flush(batch);
2667     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2668     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2669     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2670     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2671     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2672     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2673     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2674
2675     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2676     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2677     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2678 }
2679
2680 #endif
2681
2682 void
2683 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2684                              struct decode_state *decode_state,
2685                              struct gen7_mfd_context *gen7_mfd_context)
2686 {
2687     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2688     VAPictureParameterBufferJPEGBaseline *pic_param;
2689     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2690     dri_bo *slice_data_bo;
2691     int i, j, max_selector = 0;
2692
2693     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2694     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2695
2696     /* Currently only support Baseline DCT */
2697     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2698     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2699 #ifdef JPEG_WA
2700     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2701 #endif
2702     intel_batchbuffer_emit_mi_flush(batch);
2703     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2704     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2705     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2706     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2707     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2708
2709     for (j = 0; j < decode_state->num_slice_params; j++) {
2710         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2711         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2712         slice_data_bo = decode_state->slice_datas[j]->bo;
2713         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2714
2715         if (j == decode_state->num_slice_params - 1)
2716             next_slice_group_param = NULL;
2717         else
2718             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2719
2720         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2721             int component;
2722
2723             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2724
2725             if (i < decode_state->slice_params[j]->num_elements - 1)
2726                 next_slice_param = slice_param + 1;
2727             else
2728                 next_slice_param = next_slice_group_param;
2729
2730             for (component = 0; component < slice_param->num_components; component++) {
2731                 if (max_selector < slice_param->components[component].dc_table_selector)
2732                     max_selector = slice_param->components[component].dc_table_selector;
2733
2734                 if (max_selector < slice_param->components[component].ac_table_selector)
2735                     max_selector = slice_param->components[component].ac_table_selector;
2736             }
2737
2738             slice_param++;
2739         }
2740     }
2741
2742     assert(max_selector < 2);
2743     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2744
2745     for (j = 0; j < decode_state->num_slice_params; j++) {
2746         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2747         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2748         slice_data_bo = decode_state->slice_datas[j]->bo;
2749         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2750
2751         if (j == decode_state->num_slice_params - 1)
2752             next_slice_group_param = NULL;
2753         else
2754             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2755
2756         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2757             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2758
2759             if (i < decode_state->slice_params[j]->num_elements - 1)
2760                 next_slice_param = slice_param + 1;
2761             else
2762                 next_slice_param = next_slice_group_param;
2763
2764             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2765             slice_param++;
2766         }
2767     }
2768
2769     intel_batchbuffer_end_atomic(batch);
2770     intel_batchbuffer_flush(batch);
2771 }
2772
2773 static const int vp8_dc_qlookup[128] = {
2774     4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2775     18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2776     29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2777     44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2778     59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2779     75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2780     91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2781     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2782 };
2783
2784 static const int vp8_ac_qlookup[128] = {
2785     4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2786     20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2787     36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2788     52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2789     78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2790     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2791     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2792     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2793 };
2794
2795 static inline unsigned int vp8_clip_quantization_index(int index)
2796 {
2797     if (index > 127)
2798         return 127;
2799     else if (index < 0)
2800         return 0;
2801
2802     return index;
2803 }
2804
2805 static void
2806 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2807                          struct decode_state *decode_state,
2808                          struct gen7_mfd_context *gen7_mfd_context)
2809 {
2810     struct object_surface *obj_surface;
2811     struct i965_driver_data *i965 = i965_driver_data(ctx);
2812     dri_bo *bo;
2813     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2814     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2815     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2816
2817     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2818     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2819
2820     intel_update_vp8_frame_store_index(ctx,
2821                                        decode_state,
2822                                        pic_param,
2823                                        gen7_mfd_context->reference_surface);
2824
2825     /* Current decoded picture */
2826     obj_surface = decode_state->render_object;
2827     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2828
2829     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2830     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2831     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2832     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2833
2834     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2835     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2836     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2837     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2838
2839     intel_ensure_vp8_segmentation_buffer(ctx,
2840                                          &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2841
2842     /* The same as AVC */
2843     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2844     bo = dri_bo_alloc(i965->intel.bufmgr,
2845                       "intra row store",
2846                       width_in_mbs * 64,
2847                       0x1000);
2848     assert(bo);
2849     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2850     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2851
2852     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2853     bo = dri_bo_alloc(i965->intel.bufmgr,
2854                       "deblocking filter row store",
2855                       width_in_mbs * 64 * 4,
2856                       0x1000);
2857     assert(bo);
2858     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2859     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2860
2861     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2862     bo = dri_bo_alloc(i965->intel.bufmgr,
2863                       "bsd mpc row store",
2864                       width_in_mbs * 64 * 2,
2865                       0x1000);
2866     assert(bo);
2867     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2868     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2869
2870     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2871     bo = dri_bo_alloc(i965->intel.bufmgr,
2872                       "mpr row store",
2873                       width_in_mbs * 64 * 2,
2874                       0x1000);
2875     assert(bo);
2876     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2877     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2878
2879     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2880 }
2881
2882 static void
2883 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2884                        struct decode_state *decode_state,
2885                        struct gen7_mfd_context *gen7_mfd_context)
2886 {
2887     struct i965_driver_data *i965 = i965_driver_data(ctx);
2888     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2889     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2890     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2891     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2892     dri_bo *probs_bo = decode_state->probability_data->bo;
2893     int i, j, log2num;
2894     unsigned int quantization_value[4][6];
2895
2896     /* There is no safe way to error out if the segmentation buffer
2897        could not be allocated. So, instead of aborting, simply decode
2898        something even if the result may look totally inacurate */
2899     const unsigned int enable_segmentation =
2900         pic_param->pic_fields.bits.segmentation_enabled &&
2901         gen7_mfd_context->segmentation_buffer.valid;
2902
2903     log2num = (int)log2(slice_param->num_of_partitions - 1);
2904
2905     BEGIN_BCS_BATCH(batch, 38);
2906     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2907     OUT_BCS_BATCH(batch,
2908                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2909                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2910     OUT_BCS_BATCH(batch,
2911                   log2num << 24 |
2912                   pic_param->pic_fields.bits.sharpness_level << 16 |
2913                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2914                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2915                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2916                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2917                   (enable_segmentation &&
2918                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2919                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2920                   (enable_segmentation &&
2921                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2922                   (enable_segmentation &&
2923                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2924                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2925                   pic_param->pic_fields.bits.filter_type << 4 |
2926                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2927                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2928
2929     OUT_BCS_BATCH(batch,
2930                   pic_param->loop_filter_level[3] << 24 |
2931                   pic_param->loop_filter_level[2] << 16 |
2932                   pic_param->loop_filter_level[1] <<  8 |
2933                   pic_param->loop_filter_level[0] <<  0);
2934
2935     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2936     for (i = 0; i < 4; i++) {
2937         quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2938         quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2939         quantization_value[i][2] = 2 * vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])]; /*y2dc*/
2940         /* 101581>>16 is equivalent to 155/100 */
2941         quantization_value[i][3] = (101581 * vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16; /*y2ac*/
2942         quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2943         quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2944
2945         quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2946         quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2947
2948         OUT_BCS_BATCH(batch,
2949                       quantization_value[i][0] << 16 | /* Y1AC */
2950                       quantization_value[i][1] <<  0); /* Y1DC */
2951         OUT_BCS_BATCH(batch,
2952                       quantization_value[i][5] << 16 | /* UVAC */
2953                       quantization_value[i][4] <<  0); /* UVDC */
2954         OUT_BCS_BATCH(batch,
2955                       quantization_value[i][3] << 16 | /* Y2AC */
2956                       quantization_value[i][2] <<  0); /* Y2DC */
2957     }
2958
2959     /* CoeffProbability table for non-key frame, DW16-DW18 */
2960     if (probs_bo) {
2961         OUT_BCS_RELOC64(batch, probs_bo,
2962                         0, I915_GEM_DOMAIN_INSTRUCTION,
2963                         0);
2964         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2965     } else {
2966         OUT_BCS_BATCH(batch, 0);
2967         OUT_BCS_BATCH(batch, 0);
2968         OUT_BCS_BATCH(batch, 0);
2969     }
2970
2971     OUT_BCS_BATCH(batch,
2972                   pic_param->mb_segment_tree_probs[2] << 16 |
2973                   pic_param->mb_segment_tree_probs[1] <<  8 |
2974                   pic_param->mb_segment_tree_probs[0] <<  0);
2975
2976     OUT_BCS_BATCH(batch,
2977                   pic_param->prob_skip_false << 24 |
2978                   pic_param->prob_intra      << 16 |
2979                   pic_param->prob_last       <<  8 |
2980                   pic_param->prob_gf         <<  0);
2981
2982     OUT_BCS_BATCH(batch,
2983                   pic_param->y_mode_probs[3] << 24 |
2984                   pic_param->y_mode_probs[2] << 16 |
2985                   pic_param->y_mode_probs[1] <<  8 |
2986                   pic_param->y_mode_probs[0] <<  0);
2987
2988     OUT_BCS_BATCH(batch,
2989                   pic_param->uv_mode_probs[2] << 16 |
2990                   pic_param->uv_mode_probs[1] <<  8 |
2991                   pic_param->uv_mode_probs[0] <<  0);
2992
2993     /* MV update value, DW23-DW32 */
2994     for (i = 0; i < 2; i++) {
2995         for (j = 0; j < 20; j += 4) {
2996             OUT_BCS_BATCH(batch,
2997                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2998                           pic_param->mv_probs[i][j + 2] << 16 |
2999                           pic_param->mv_probs[i][j + 1] <<  8 |
3000                           pic_param->mv_probs[i][j + 0] <<  0);
3001         }
3002     }
3003
3004     OUT_BCS_BATCH(batch,
3005                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
3006                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
3007                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
3008                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
3009
3010     OUT_BCS_BATCH(batch,
3011                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
3012                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
3013                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
3014                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
3015
3016     /* segmentation id stream base address, DW35-DW37 */
3017     if (enable_segmentation) {
3018         OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
3019                         0, I915_GEM_DOMAIN_INSTRUCTION,
3020                         0);
3021         OUT_BCS_BATCH(batch, i965->intel.mocs_state);
3022     } else {
3023         OUT_BCS_BATCH(batch, 0);
3024         OUT_BCS_BATCH(batch, 0);
3025         OUT_BCS_BATCH(batch, 0);
3026     }
3027     ADVANCE_BCS_BATCH(batch);
3028 }
3029
3030 static void
3031 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
3032                         VAPictureParameterBufferVP8 *pic_param,
3033                         VASliceParameterBufferVP8 *slice_param,
3034                         dri_bo *slice_data_bo,
3035                         struct gen7_mfd_context *gen7_mfd_context)
3036 {
3037     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3038     int i, log2num;
3039     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7) >> 3);
3040     unsigned int used_bits = 8 - pic_param->bool_coder_ctx.count;
3041     unsigned int partition_size_0 = slice_param->partition_size[0];
3042
3043     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
3044     if (used_bits == 8) {
3045         used_bits = 0;
3046         offset += 1;
3047         partition_size_0 -= 1;
3048     }
3049
3050     assert(slice_param->num_of_partitions >= 2);
3051     assert(slice_param->num_of_partitions <= 9);
3052
3053     log2num = (int)log2(slice_param->num_of_partitions - 1);
3054
3055     BEGIN_BCS_BATCH(batch, 22);
3056     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
3057     OUT_BCS_BATCH(batch,
3058                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
3059                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
3060                   log2num << 4 |
3061                   (slice_param->macroblock_offset & 0x7));
3062     OUT_BCS_BATCH(batch,
3063                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3064                   0);
3065
3066     OUT_BCS_BATCH(batch, partition_size_0 + 1);
3067     OUT_BCS_BATCH(batch, offset);
3068     //partion sizes in bytes are present after the above first partition when there are more than one token partition
3069     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3070     for (i = 1; i < 9; i++) {
3071         if (i < slice_param->num_of_partitions) {
3072             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3073             OUT_BCS_BATCH(batch, offset);
3074         } else {
3075             OUT_BCS_BATCH(batch, 0);
3076             OUT_BCS_BATCH(batch, 0);
3077         }
3078
3079         offset += slice_param->partition_size[i];
3080     }
3081
3082     OUT_BCS_BATCH(batch, 0); /* concealment method */
3083
3084     ADVANCE_BCS_BATCH(batch);
3085 }
3086
3087 void
3088 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3089                             struct decode_state *decode_state,
3090                             struct gen7_mfd_context *gen7_mfd_context)
3091 {
3092     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3093     VAPictureParameterBufferVP8 *pic_param;
3094     VASliceParameterBufferVP8 *slice_param;
3095     dri_bo *slice_data_bo;
3096
3097     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3098     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3099
3100     /* one slice per frame */
3101     if (decode_state->num_slice_params != 1 ||
3102         (!decode_state->slice_params ||
3103          !decode_state->slice_params[0] ||
3104          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3105         (!decode_state->slice_datas ||
3106          !decode_state->slice_datas[0] ||
3107          !decode_state->slice_datas[0]->bo) ||
3108         !decode_state->probability_data) {
3109         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3110
3111         return;
3112     }
3113
3114     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3115     slice_data_bo = decode_state->slice_datas[0]->bo;
3116
3117     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3118     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3119     intel_batchbuffer_emit_mi_flush(batch);
3120     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3121     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3122     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3123     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3124     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3125     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3126     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3127     intel_batchbuffer_end_atomic(batch);
3128     intel_batchbuffer_flush(batch);
3129 }
3130
3131 static VAStatus
3132 gen8_mfd_decode_picture(VADriverContextP ctx,
3133                         VAProfile profile,
3134                         union codec_state *codec_state,
3135                         struct hw_context *hw_context)
3136
3137 {
3138     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3139     struct decode_state *decode_state = &codec_state->decode;
3140     VAStatus vaStatus;
3141
3142     assert(gen7_mfd_context);
3143
3144     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3145
3146     if (vaStatus != VA_STATUS_SUCCESS)
3147         goto out;
3148
3149     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3150
3151     switch (profile) {
3152     case VAProfileMPEG2Simple:
3153     case VAProfileMPEG2Main:
3154         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3155         break;
3156
3157     case VAProfileH264ConstrainedBaseline:
3158     case VAProfileH264Main:
3159     case VAProfileH264High:
3160     case VAProfileH264StereoHigh:
3161     case VAProfileH264MultiviewHigh:
3162         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3163         break;
3164
3165     case VAProfileVC1Simple:
3166     case VAProfileVC1Main:
3167     case VAProfileVC1Advanced:
3168         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3169         break;
3170
3171     case VAProfileJPEGBaseline:
3172         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3173         break;
3174
3175     case VAProfileVP8Version0_3:
3176         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3177         break;
3178
3179     default:
3180         assert(0);
3181         break;
3182     }
3183
3184     vaStatus = VA_STATUS_SUCCESS;
3185
3186 out:
3187     return vaStatus;
3188 }
3189
3190 static void
3191 gen8_mfd_context_destroy(void *hw_context)
3192 {
3193     VADriverContextP ctx;
3194     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3195
3196     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3197
3198     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3199     gen7_mfd_context->post_deblocking_output.bo = NULL;
3200
3201     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3202     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3203
3204     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3205     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3206
3207     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3208     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3209
3210     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3211     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3212
3213     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3214     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3215
3216     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3217     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3218
3219     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3220     gen7_mfd_context->segmentation_buffer.bo = NULL;
3221
3222     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3223
3224     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3225         i965_DestroySurfaces(ctx,
3226                              &gen7_mfd_context->jpeg_wa_surface_id,
3227                              1);
3228         gen7_mfd_context->jpeg_wa_surface_object = NULL;
3229     }
3230
3231     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3232     free(gen7_mfd_context);
3233 }
3234
3235 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3236                                         struct gen7_mfd_context *gen7_mfd_context)
3237 {
3238     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3239     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3240     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3241     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3242 }
3243
3244 struct hw_context *
3245 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3246 {
3247     struct intel_driver_data *intel = intel_driver_data(ctx);
3248     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3249     int i;
3250
3251     if (!gen7_mfd_context)
3252         return NULL;
3253
3254     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3255     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3256     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3257
3258     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3259         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3260         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3261     }
3262
3263     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3264     gen7_mfd_context->segmentation_buffer.valid = 0;
3265
3266     switch (obj_config->profile) {
3267     case VAProfileMPEG2Simple:
3268     case VAProfileMPEG2Main:
3269         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3270         break;
3271
3272     case VAProfileH264ConstrainedBaseline:
3273     case VAProfileH264Main:
3274     case VAProfileH264High:
3275     case VAProfileH264StereoHigh:
3276     case VAProfileH264MultiviewHigh:
3277         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3278         break;
3279     default:
3280         break;
3281     }
3282
3283     gen7_mfd_context->driver_context = ctx;
3284     return (struct hw_context *)gen7_mfd_context;
3285 }