2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 assert((obj_surface->size & 0x3f) == 0);
78 obj_surface->private_data = gen7_avc_surface;
81 /* DMV buffers now relate to the whole frame, irrespective of
83 if (gen7_avc_surface->dmv_top == NULL) {
84 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
85 "direct mv w/r buffer",
86 width_in_mbs * height_in_mbs * 128,
88 assert(gen7_avc_surface->dmv_top);
93 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
94 struct decode_state *decode_state,
96 struct gen7_mfd_context *gen7_mfd_context)
98 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
100 assert(standard_select == MFX_FORMAT_MPEG2 ||
101 standard_select == MFX_FORMAT_AVC ||
102 standard_select == MFX_FORMAT_VC1 ||
103 standard_select == MFX_FORMAT_JPEG ||
104 standard_select == MFX_FORMAT_VP8);
106 BEGIN_BCS_BATCH(batch, 5);
107 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
109 (MFX_LONG_MODE << 17) | /* Currently only support long format */
110 (MFD_MODE_VLD << 15) | /* VLD mode */
111 (0 << 10) | /* disable Stream-Out */
112 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
113 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
114 (0 << 5) | /* not in stitch mode */
115 (MFX_CODEC_DECODE << 4) | /* decoding mode */
116 (standard_select << 0));
118 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
119 (0 << 3) | /* terminate if AVC mbdata error occurs */
120 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
123 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
124 OUT_BCS_BATCH(batch, 0); /* reserved */
125 ADVANCE_BCS_BATCH(batch);
129 gen8_mfd_surface_state(VADriverContextP ctx,
130 struct decode_state *decode_state,
132 struct gen7_mfd_context *gen7_mfd_context)
134 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
135 struct object_surface *obj_surface = decode_state->render_object;
136 unsigned int y_cb_offset;
137 unsigned int y_cr_offset;
138 unsigned int surface_format;
142 y_cb_offset = obj_surface->y_cb_offset;
143 y_cr_offset = obj_surface->y_cr_offset;
145 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
146 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
148 BEGIN_BCS_BATCH(batch, 6);
149 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
150 OUT_BCS_BATCH(batch, 0);
152 ((obj_surface->orig_height - 1) << 18) |
153 ((obj_surface->orig_width - 1) << 4));
155 (surface_format << 28) | /* 420 planar YUV surface */
156 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
157 (0 << 22) | /* surface object control state, ignored */
158 ((obj_surface->width - 1) << 3) | /* pitch */
159 (0 << 2) | /* must be 0 */
160 (1 << 1) | /* must be tiled */
161 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
163 (0 << 16) | /* X offset for U(Cb), must be 0 */
164 (y_cb_offset << 0)); /* Y offset for U(Cb) */
166 (0 << 16) | /* X offset for V(Cr), must be 0 */
167 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
168 ADVANCE_BCS_BATCH(batch);
172 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
173 struct decode_state *decode_state,
175 struct gen7_mfd_context *gen7_mfd_context)
177 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
180 BEGIN_BCS_BATCH(batch, 61);
181 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
182 /* Pre-deblock 1-3 */
183 if (gen7_mfd_context->pre_deblocking_output.valid)
184 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
185 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
188 OUT_BCS_BATCH(batch, 0);
190 OUT_BCS_BATCH(batch, 0);
191 OUT_BCS_BATCH(batch, 0);
192 /* Post-debloing 4-6 */
193 if (gen7_mfd_context->post_deblocking_output.valid)
194 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
195 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198 OUT_BCS_BATCH(batch, 0);
200 OUT_BCS_BATCH(batch, 0);
201 OUT_BCS_BATCH(batch, 0);
203 /* uncompressed-video & stream out 7-12 */
204 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
205 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206 OUT_BCS_BATCH(batch, 0);
207 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
209 OUT_BCS_BATCH(batch, 0);
211 /* intra row-store scratch 13-15 */
212 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
213 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
214 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
217 OUT_BCS_BATCH(batch, 0);
219 OUT_BCS_BATCH(batch, 0);
220 OUT_BCS_BATCH(batch, 0);
221 /* deblocking-filter-row-store 16-18 */
222 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
223 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
224 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
227 OUT_BCS_BATCH(batch, 0);
228 OUT_BCS_BATCH(batch, 0);
229 OUT_BCS_BATCH(batch, 0);
232 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
233 struct object_surface *obj_surface;
235 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
236 gen7_mfd_context->reference_surface[i].obj_surface &&
237 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
238 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
240 OUT_BCS_RELOC(batch, obj_surface->bo,
241 I915_GEM_DOMAIN_INSTRUCTION, 0,
244 OUT_BCS_BATCH(batch, 0);
247 OUT_BCS_BATCH(batch, 0);
250 /* reference property 51 */
251 OUT_BCS_BATCH(batch, 0);
253 /* Macroblock status & ILDB 52-57 */
254 OUT_BCS_BATCH(batch, 0);
255 OUT_BCS_BATCH(batch, 0);
256 OUT_BCS_BATCH(batch, 0);
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
259 OUT_BCS_BATCH(batch, 0);
261 /* the second Macroblock status 58-60 */
262 OUT_BCS_BATCH(batch, 0);
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
266 ADVANCE_BCS_BATCH(batch);
270 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
271 dri_bo *slice_data_bo,
273 struct gen7_mfd_context *gen7_mfd_context)
275 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
277 BEGIN_BCS_BATCH(batch, 26);
278 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
280 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
281 OUT_BCS_BATCH(batch, 0);
282 OUT_BCS_BATCH(batch, 0);
283 /* Upper bound 4-5 */
284 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
285 OUT_BCS_BATCH(batch, 0);
287 /* MFX indirect MV 6-10 */
288 OUT_BCS_BATCH(batch, 0);
289 OUT_BCS_BATCH(batch, 0);
290 OUT_BCS_BATCH(batch, 0);
291 OUT_BCS_BATCH(batch, 0);
292 OUT_BCS_BATCH(batch, 0);
294 /* MFX IT_COFF 11-15 */
295 OUT_BCS_BATCH(batch, 0);
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
299 OUT_BCS_BATCH(batch, 0);
301 /* MFX IT_DBLK 16-20 */
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
306 OUT_BCS_BATCH(batch, 0);
308 /* MFX PAK_BSE object for encoder 21-25 */
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
313 OUT_BCS_BATCH(batch, 0);
315 ADVANCE_BCS_BATCH(batch);
319 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
320 struct decode_state *decode_state,
322 struct gen7_mfd_context *gen7_mfd_context)
324 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
326 BEGIN_BCS_BATCH(batch, 10);
327 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
329 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
330 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
331 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
334 OUT_BCS_BATCH(batch, 0);
336 OUT_BCS_BATCH(batch, 0);
337 OUT_BCS_BATCH(batch, 0);
338 /* MPR Row Store Scratch buffer 4-6 */
339 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
340 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
341 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
344 OUT_BCS_BATCH(batch, 0);
346 OUT_BCS_BATCH(batch, 0);
347 OUT_BCS_BATCH(batch, 0);
350 if (gen7_mfd_context->bitplane_read_buffer.valid)
351 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
352 I915_GEM_DOMAIN_INSTRUCTION, 0,
355 OUT_BCS_BATCH(batch, 0);
356 OUT_BCS_BATCH(batch, 0);
357 OUT_BCS_BATCH(batch, 0);
358 ADVANCE_BCS_BATCH(batch);
362 gen8_mfd_qm_state(VADriverContextP ctx,
366 struct gen7_mfd_context *gen7_mfd_context)
368 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
369 unsigned int qm_buffer[16];
371 assert(qm_length <= 16 * 4);
372 memcpy(qm_buffer, qm, qm_length);
374 BEGIN_BCS_BATCH(batch, 18);
375 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
376 OUT_BCS_BATCH(batch, qm_type << 0);
377 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
378 ADVANCE_BCS_BATCH(batch);
382 gen8_mfd_avc_img_state(VADriverContextP ctx,
383 struct decode_state *decode_state,
384 struct gen7_mfd_context *gen7_mfd_context)
386 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
388 int mbaff_frame_flag;
389 unsigned int width_in_mbs, height_in_mbs;
390 VAPictureParameterBufferH264 *pic_param;
392 assert(decode_state->pic_param && decode_state->pic_param->buffer);
393 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
394 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
396 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
398 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
403 if ((img_struct & 0x1) == 0x1) {
404 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
406 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
409 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
410 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
411 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
413 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
416 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
417 !pic_param->pic_fields.bits.field_pic_flag);
419 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
420 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
422 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
423 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
424 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
425 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
427 BEGIN_BCS_BATCH(batch, 17);
428 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
430 (width_in_mbs * height_in_mbs - 1));
432 ((height_in_mbs - 1) << 16) |
433 ((width_in_mbs - 1) << 0));
435 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
436 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
437 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
438 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
439 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
440 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
443 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
444 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
445 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
446 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
447 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
448 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
449 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
450 (mbaff_frame_flag << 1) |
451 (pic_param->pic_fields.bits.field_pic_flag << 0));
452 OUT_BCS_BATCH(batch, 0);
453 OUT_BCS_BATCH(batch, 0);
454 OUT_BCS_BATCH(batch, 0);
455 OUT_BCS_BATCH(batch, 0);
456 OUT_BCS_BATCH(batch, 0);
457 OUT_BCS_BATCH(batch, 0);
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 ADVANCE_BCS_BATCH(batch);
468 gen8_mfd_avc_qm_state(VADriverContextP ctx,
469 struct decode_state *decode_state,
470 struct gen7_mfd_context *gen7_mfd_context)
472 VAIQMatrixBufferH264 *iq_matrix;
473 VAPictureParameterBufferH264 *pic_param;
475 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
476 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
478 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
480 assert(decode_state->pic_param && decode_state->pic_param->buffer);
481 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
483 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
484 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
486 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
487 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
488 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
493 gen8_mfd_avc_picid_state(VADriverContextP ctx,
494 struct decode_state *decode_state,
495 struct gen7_mfd_context *gen7_mfd_context)
497 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
499 BEGIN_BCS_BATCH(batch, 10);
500 OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
501 OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
502 OUT_BCS_BATCH(batch, 0);
503 OUT_BCS_BATCH(batch, 0);
504 OUT_BCS_BATCH(batch, 0);
505 OUT_BCS_BATCH(batch, 0);
506 OUT_BCS_BATCH(batch, 0);
507 OUT_BCS_BATCH(batch, 0);
508 OUT_BCS_BATCH(batch, 0);
509 OUT_BCS_BATCH(batch, 0);
510 ADVANCE_BCS_BATCH(batch);
514 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
515 struct decode_state *decode_state,
516 VAPictureParameterBufferH264 *pic_param,
517 VASliceParameterBufferH264 *slice_param,
518 struct gen7_mfd_context *gen7_mfd_context)
520 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
521 struct object_surface *obj_surface;
522 GenAvcSurface *gen7_avc_surface;
523 VAPictureH264 *va_pic;
526 BEGIN_BCS_BATCH(batch, 71);
527 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
529 /* reference surfaces 0..15 */
530 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
531 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
532 gen7_mfd_context->reference_surface[i].obj_surface &&
533 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
535 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
536 gen7_avc_surface = obj_surface->private_data;
538 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
539 I915_GEM_DOMAIN_INSTRUCTION, 0,
541 OUT_BCS_BATCH(batch, 0);
543 OUT_BCS_BATCH(batch, 0);
544 OUT_BCS_BATCH(batch, 0);
548 OUT_BCS_BATCH(batch, 0);
550 /* the current decoding frame/field */
551 va_pic = &pic_param->CurrPic;
552 obj_surface = decode_state->render_object;
553 assert(obj_surface->bo && obj_surface->private_data);
554 gen7_avc_surface = obj_surface->private_data;
556 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
557 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
560 OUT_BCS_BATCH(batch, 0);
561 OUT_BCS_BATCH(batch, 0);
564 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
565 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
568 const VAPictureH264 * const va_pic = avc_find_picture(
569 obj_surface->base.id, pic_param->ReferenceFrames,
570 ARRAY_ELEMS(pic_param->ReferenceFrames));
572 assert(va_pic != NULL);
573 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
574 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
576 OUT_BCS_BATCH(batch, 0);
577 OUT_BCS_BATCH(batch, 0);
581 va_pic = &pic_param->CurrPic;
582 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
583 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
585 ADVANCE_BCS_BATCH(batch);
589 gen8_mfd_avc_slice_state(VADriverContextP ctx,
590 VAPictureParameterBufferH264 *pic_param,
591 VASliceParameterBufferH264 *slice_param,
592 VASliceParameterBufferH264 *next_slice_param,
593 struct gen7_mfd_context *gen7_mfd_context)
595 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
596 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
597 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
598 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
599 int num_ref_idx_l0, num_ref_idx_l1;
600 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
601 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
602 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
605 if (slice_param->slice_type == SLICE_TYPE_I ||
606 slice_param->slice_type == SLICE_TYPE_SI) {
607 slice_type = SLICE_TYPE_I;
608 } else if (slice_param->slice_type == SLICE_TYPE_P ||
609 slice_param->slice_type == SLICE_TYPE_SP) {
610 slice_type = SLICE_TYPE_P;
612 assert(slice_param->slice_type == SLICE_TYPE_B);
613 slice_type = SLICE_TYPE_B;
616 if (slice_type == SLICE_TYPE_I) {
617 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
618 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
621 } else if (slice_type == SLICE_TYPE_P) {
622 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
623 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
626 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
627 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
630 first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
631 slice_hor_pos = first_mb_in_slice % width_in_mbs;
632 slice_ver_pos = first_mb_in_slice / width_in_mbs;
634 if (next_slice_param) {
635 first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
636 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
637 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
639 next_slice_hor_pos = 0;
640 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
643 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
644 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
645 OUT_BCS_BATCH(batch, slice_type);
647 (num_ref_idx_l1 << 24) |
648 (num_ref_idx_l0 << 16) |
649 (slice_param->chroma_log2_weight_denom << 8) |
650 (slice_param->luma_log2_weight_denom << 0));
652 (slice_param->direct_spatial_mv_pred_flag << 29) |
653 (slice_param->disable_deblocking_filter_idc << 27) |
654 (slice_param->cabac_init_idc << 24) |
655 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
656 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
657 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
659 (slice_ver_pos << 24) |
660 (slice_hor_pos << 16) |
661 (first_mb_in_slice << 0));
663 (next_slice_ver_pos << 16) |
664 (next_slice_hor_pos << 0));
666 (next_slice_param == NULL) << 19); /* last slice flag */
667 OUT_BCS_BATCH(batch, 0);
668 OUT_BCS_BATCH(batch, 0);
669 OUT_BCS_BATCH(batch, 0);
670 OUT_BCS_BATCH(batch, 0);
671 ADVANCE_BCS_BATCH(batch);
675 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
676 VAPictureParameterBufferH264 *pic_param,
677 VASliceParameterBufferH264 *slice_param,
678 struct gen7_mfd_context *gen7_mfd_context)
680 gen6_send_avc_ref_idx_state(
681 gen7_mfd_context->base.batch,
683 gen7_mfd_context->reference_surface
688 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
689 VAPictureParameterBufferH264 *pic_param,
690 VASliceParameterBufferH264 *slice_param,
691 struct gen7_mfd_context *gen7_mfd_context)
693 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
694 int i, j, num_weight_offset_table = 0;
695 short weightoffsets[32 * 6];
697 if ((slice_param->slice_type == SLICE_TYPE_P ||
698 slice_param->slice_type == SLICE_TYPE_SP) &&
699 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
700 num_weight_offset_table = 1;
703 if ((slice_param->slice_type == SLICE_TYPE_B) &&
704 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
705 num_weight_offset_table = 2;
708 for (i = 0; i < num_weight_offset_table; i++) {
709 BEGIN_BCS_BATCH(batch, 98);
710 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
711 OUT_BCS_BATCH(batch, i);
714 for (j = 0; j < 32; j++) {
715 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
716 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
717 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
718 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
719 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
720 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
723 for (j = 0; j < 32; j++) {
724 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
725 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
726 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
727 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
728 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
729 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
733 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
734 ADVANCE_BCS_BATCH(batch);
739 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
740 VAPictureParameterBufferH264 *pic_param,
741 VASliceParameterBufferH264 *slice_param,
742 dri_bo *slice_data_bo,
743 VASliceParameterBufferH264 *next_slice_param,
744 struct gen7_mfd_context *gen7_mfd_context)
746 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
747 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
749 pic_param->pic_fields.bits.entropy_coding_mode_flag);
751 /* the input bitsteam format on GEN7 differs from GEN6 */
752 BEGIN_BCS_BATCH(batch, 6);
753 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
755 (slice_param->slice_data_size));
756 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
764 ((slice_data_bit_offset >> 3) << 16) |
768 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
769 (slice_data_bit_offset & 0x7));
770 OUT_BCS_BATCH(batch, 0);
771 ADVANCE_BCS_BATCH(batch);
775 gen8_mfd_avc_context_init(
776 VADriverContextP ctx,
777 struct gen7_mfd_context *gen7_mfd_context
780 /* Initialize flat scaling lists */
781 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
785 gen8_mfd_avc_decode_init(VADriverContextP ctx,
786 struct decode_state *decode_state,
787 struct gen7_mfd_context *gen7_mfd_context)
789 VAPictureParameterBufferH264 *pic_param;
790 VASliceParameterBufferH264 *slice_param;
791 struct i965_driver_data *i965 = i965_driver_data(ctx);
792 struct object_surface *obj_surface;
794 int i, j, enable_avc_ildb = 0;
795 unsigned int width_in_mbs, height_in_mbs;
797 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
798 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
799 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
801 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
802 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
803 assert((slice_param->slice_type == SLICE_TYPE_I) ||
804 (slice_param->slice_type == SLICE_TYPE_SI) ||
805 (slice_param->slice_type == SLICE_TYPE_P) ||
806 (slice_param->slice_type == SLICE_TYPE_SP) ||
807 (slice_param->slice_type == SLICE_TYPE_B));
809 if (slice_param->disable_deblocking_filter_idc != 1) {
818 assert(decode_state->pic_param && decode_state->pic_param->buffer);
819 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
820 intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
821 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
822 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
823 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
824 assert(height_in_mbs > 0 && height_in_mbs <= 256);
826 /* Current decoded picture */
827 obj_surface = decode_state->render_object;
828 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
829 obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
831 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
832 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
834 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
835 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
836 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
837 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
839 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
840 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
841 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
842 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
844 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
845 bo = dri_bo_alloc(i965->intel.bufmgr,
850 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
851 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
853 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
854 bo = dri_bo_alloc(i965->intel.bufmgr,
855 "deblocking filter row store",
856 width_in_mbs * 64 * 4,
859 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
860 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
862 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
863 bo = dri_bo_alloc(i965->intel.bufmgr,
865 width_in_mbs * 64 * 2,
868 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
869 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
871 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
872 bo = dri_bo_alloc(i965->intel.bufmgr,
874 width_in_mbs * 64 * 2,
877 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
878 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
880 gen7_mfd_context->bitplane_read_buffer.valid = 0;
884 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
885 struct decode_state *decode_state,
886 struct gen7_mfd_context *gen7_mfd_context)
888 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
889 VAPictureParameterBufferH264 *pic_param;
890 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
891 dri_bo *slice_data_bo;
894 assert(decode_state->pic_param && decode_state->pic_param->buffer);
895 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
896 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
898 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
899 intel_batchbuffer_emit_mi_flush(batch);
900 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
901 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
902 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
903 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
904 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
905 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
906 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
908 for (j = 0; j < decode_state->num_slice_params; j++) {
909 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
910 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
911 slice_data_bo = decode_state->slice_datas[j]->bo;
912 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
914 if (j == decode_state->num_slice_params - 1)
915 next_slice_group_param = NULL;
917 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
919 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
920 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
921 assert((slice_param->slice_type == SLICE_TYPE_I) ||
922 (slice_param->slice_type == SLICE_TYPE_SI) ||
923 (slice_param->slice_type == SLICE_TYPE_P) ||
924 (slice_param->slice_type == SLICE_TYPE_SP) ||
925 (slice_param->slice_type == SLICE_TYPE_B));
927 if (i < decode_state->slice_params[j]->num_elements - 1)
928 next_slice_param = slice_param + 1;
930 next_slice_param = next_slice_group_param;
932 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
933 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
934 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
935 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
936 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
941 intel_batchbuffer_end_atomic(batch);
942 intel_batchbuffer_flush(batch);
946 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
947 struct decode_state *decode_state,
948 struct gen7_mfd_context *gen7_mfd_context)
950 VAPictureParameterBufferMPEG2 *pic_param;
951 struct i965_driver_data *i965 = i965_driver_data(ctx);
952 struct object_surface *obj_surface;
954 unsigned int width_in_mbs;
956 assert(decode_state->pic_param && decode_state->pic_param->buffer);
957 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
958 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
960 mpeg2_set_reference_surfaces(
962 gen7_mfd_context->reference_surface,
967 /* Current decoded picture */
968 obj_surface = decode_state->render_object;
969 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
971 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
972 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
973 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
974 gen7_mfd_context->pre_deblocking_output.valid = 1;
976 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
977 bo = dri_bo_alloc(i965->intel.bufmgr,
982 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
983 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
985 gen7_mfd_context->post_deblocking_output.valid = 0;
986 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
987 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
988 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
989 gen7_mfd_context->bitplane_read_buffer.valid = 0;
993 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
994 struct decode_state *decode_state,
995 struct gen7_mfd_context *gen7_mfd_context)
997 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
998 VAPictureParameterBufferMPEG2 *pic_param;
999 unsigned int slice_concealment_disable_bit = 0;
1001 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1002 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1004 slice_concealment_disable_bit = 1;
1006 BEGIN_BCS_BATCH(batch, 13);
1007 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1008 OUT_BCS_BATCH(batch,
1009 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1010 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1011 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1012 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1013 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1014 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1015 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1016 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1017 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1018 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1019 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1020 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1021 OUT_BCS_BATCH(batch,
1022 pic_param->picture_coding_type << 9);
1023 OUT_BCS_BATCH(batch,
1024 (slice_concealment_disable_bit << 31) |
1025 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1026 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1027 OUT_BCS_BATCH(batch, 0);
1028 OUT_BCS_BATCH(batch, 0);
1029 OUT_BCS_BATCH(batch, 0);
1030 OUT_BCS_BATCH(batch, 0);
1031 OUT_BCS_BATCH(batch, 0);
1032 OUT_BCS_BATCH(batch, 0);
1033 OUT_BCS_BATCH(batch, 0);
1034 OUT_BCS_BATCH(batch, 0);
1035 OUT_BCS_BATCH(batch, 0);
1036 ADVANCE_BCS_BATCH(batch);
1040 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1041 struct decode_state *decode_state,
1042 struct gen7_mfd_context *gen7_mfd_context)
1044 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1047 /* Update internal QM state */
1048 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1049 VAIQMatrixBufferMPEG2 * const iq_matrix =
1050 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1052 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1053 iq_matrix->load_intra_quantiser_matrix) {
1054 gen_iq_matrix->load_intra_quantiser_matrix =
1055 iq_matrix->load_intra_quantiser_matrix;
1056 if (iq_matrix->load_intra_quantiser_matrix) {
1057 for (j = 0; j < 64; j++)
1058 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1059 iq_matrix->intra_quantiser_matrix[j];
1063 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1064 iq_matrix->load_non_intra_quantiser_matrix) {
1065 gen_iq_matrix->load_non_intra_quantiser_matrix =
1066 iq_matrix->load_non_intra_quantiser_matrix;
1067 if (iq_matrix->load_non_intra_quantiser_matrix) {
1068 for (j = 0; j < 64; j++)
1069 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1070 iq_matrix->non_intra_quantiser_matrix[j];
1075 /* Commit QM state to HW */
1076 for (i = 0; i < 2; i++) {
1077 unsigned char *qm = NULL;
1081 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1082 qm = gen_iq_matrix->intra_quantiser_matrix;
1083 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1086 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1087 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1088 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1095 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1100 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1101 VAPictureParameterBufferMPEG2 *pic_param,
1102 VASliceParameterBufferMPEG2 *slice_param,
1103 VASliceParameterBufferMPEG2 *next_slice_param,
1104 struct gen7_mfd_context *gen7_mfd_context)
1106 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1107 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1108 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1110 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1111 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1113 is_field_pic_wa = is_field_pic &&
1114 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1116 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1117 hpos0 = slice_param->slice_horizontal_position;
1119 if (next_slice_param == NULL) {
1120 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1123 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1124 hpos1 = next_slice_param->slice_horizontal_position;
1127 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1129 BEGIN_BCS_BATCH(batch, 5);
1130 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1131 OUT_BCS_BATCH(batch,
1132 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1133 OUT_BCS_BATCH(batch,
1134 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1135 OUT_BCS_BATCH(batch,
1139 (next_slice_param == NULL) << 5 |
1140 (next_slice_param == NULL) << 3 |
1141 (slice_param->macroblock_offset & 0x7));
1142 OUT_BCS_BATCH(batch,
1143 (slice_param->quantiser_scale_code << 24) |
1144 (vpos1 << 8 | hpos1));
1145 ADVANCE_BCS_BATCH(batch);
1149 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1150 struct decode_state *decode_state,
1151 struct gen7_mfd_context *gen7_mfd_context)
1153 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1154 VAPictureParameterBufferMPEG2 *pic_param;
1155 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1156 dri_bo *slice_data_bo;
1159 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1160 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1162 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1163 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1164 intel_batchbuffer_emit_mi_flush(batch);
1165 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1166 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1167 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1168 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1169 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1170 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1172 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1173 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1174 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1176 for (j = 0; j < decode_state->num_slice_params; j++) {
1177 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1178 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1179 slice_data_bo = decode_state->slice_datas[j]->bo;
1180 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1182 if (j == decode_state->num_slice_params - 1)
1183 next_slice_group_param = NULL;
1185 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1187 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1188 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1190 if (i < decode_state->slice_params[j]->num_elements - 1)
1191 next_slice_param = slice_param + 1;
1193 next_slice_param = next_slice_group_param;
1195 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1200 intel_batchbuffer_end_atomic(batch);
1201 intel_batchbuffer_flush(batch);
1204 static const int va_to_gen7_vc1_pic_type[5] = {
1208 GEN7_VC1_BI_PICTURE,
1212 static const int va_to_gen7_vc1_mv[4] = {
1214 2, /* 1-MV half-pel */
1215 3, /* 1-MV half-pef bilinear */
1219 static const int b_picture_scale_factor[21] = {
1220 128, 85, 170, 64, 192,
1221 51, 102, 153, 204, 43,
1222 215, 37, 74, 111, 148,
1223 185, 222, 32, 96, 160,
1227 static const int va_to_gen7_vc1_condover[3] = {
1233 static const int va_to_gen7_vc1_profile[4] = {
1234 GEN7_VC1_SIMPLE_PROFILE,
1235 GEN7_VC1_MAIN_PROFILE,
1236 GEN7_VC1_RESERVED_PROFILE,
1237 GEN7_VC1_ADVANCED_PROFILE
1241 gen8_mfd_free_vc1_surface(void **data)
1243 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1245 if (!gen7_vc1_surface)
1248 dri_bo_unreference(gen7_vc1_surface->dmv);
1249 free(gen7_vc1_surface);
1254 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1255 VAPictureParameterBufferVC1 *pic_param,
1256 struct object_surface *obj_surface)
1258 struct i965_driver_data *i965 = i965_driver_data(ctx);
1259 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1260 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1261 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1263 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1265 if (!gen7_vc1_surface) {
1266 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1267 assert((obj_surface->size & 0x3f) == 0);
1268 obj_surface->private_data = gen7_vc1_surface;
1271 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1273 if (gen7_vc1_surface->dmv == NULL) {
1274 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1275 "direct mv w/r buffer",
1276 width_in_mbs * height_in_mbs * 64,
1282 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1283 struct decode_state *decode_state,
1284 struct gen7_mfd_context *gen7_mfd_context)
1286 VAPictureParameterBufferVC1 *pic_param;
1287 struct i965_driver_data *i965 = i965_driver_data(ctx);
1288 struct object_surface *obj_surface;
1293 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1294 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1295 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1296 picture_type = pic_param->picture_fields.bits.picture_type;
1298 intel_update_vc1_frame_store_index(ctx,
1301 gen7_mfd_context->reference_surface);
1303 /* Current decoded picture */
1304 obj_surface = decode_state->render_object;
1305 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1306 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1308 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1309 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1310 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1311 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1313 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1314 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1315 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1316 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1318 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1319 bo = dri_bo_alloc(i965->intel.bufmgr,
1324 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1325 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1327 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1328 bo = dri_bo_alloc(i965->intel.bufmgr,
1329 "deblocking filter row store",
1330 width_in_mbs * 7 * 64,
1333 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1334 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1336 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1337 bo = dri_bo_alloc(i965->intel.bufmgr,
1338 "bsd mpc row store",
1342 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1343 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1345 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1347 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1348 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1350 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1351 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1352 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1353 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1355 uint8_t *src = NULL, *dst = NULL;
1357 assert(decode_state->bit_plane->buffer);
1358 src = decode_state->bit_plane->buffer;
1360 bo = dri_bo_alloc(i965->intel.bufmgr,
1362 bitplane_width * height_in_mbs,
1365 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1367 dri_bo_map(bo, True);
1368 assert(bo->virtual);
1371 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1372 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1373 int src_index, dst_index;
1377 src_index = (src_h * width_in_mbs + src_w) / 2;
1378 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1379 src_value = ((src[src_index] >> src_shift) & 0xf);
1381 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1385 dst_index = src_w / 2;
1386 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1390 dst[src_w / 2] >>= 4;
1392 dst += bitplane_width;
1397 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1401 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1402 struct decode_state *decode_state,
1403 struct gen7_mfd_context *gen7_mfd_context)
1405 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1406 VAPictureParameterBufferVC1 *pic_param;
1407 struct object_surface *obj_surface;
1408 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1409 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1410 int unified_mv_mode;
1411 int ref_field_pic_polarity = 0;
1412 int scale_factor = 0;
1414 int dmv_surface_valid = 0;
1420 int interpolation_mode = 0;
1422 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1423 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1425 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1426 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1427 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1428 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1429 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1430 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1431 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1432 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1435 alt_pquant_config = 0;
1436 alt_pquant_edge_mask = 0;
1437 } else if (dquant == 2) {
1438 alt_pquant_config = 1;
1439 alt_pquant_edge_mask = 0xf;
1441 assert(dquant == 1);
1442 if (dquantfrm == 0) {
1443 alt_pquant_config = 0;
1444 alt_pquant_edge_mask = 0;
1447 assert(dquantfrm == 1);
1448 alt_pquant_config = 1;
1450 switch (dqprofile) {
1452 if (dqbilevel == 0) {
1453 alt_pquant_config = 2;
1454 alt_pquant_edge_mask = 0;
1456 assert(dqbilevel == 1);
1457 alt_pquant_config = 3;
1458 alt_pquant_edge_mask = 0;
1463 alt_pquant_edge_mask = 0xf;
1468 alt_pquant_edge_mask = 0x9;
1470 alt_pquant_edge_mask = (0x3 << dqdbedge);
1475 alt_pquant_edge_mask = (0x1 << dqsbedge);
1484 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1485 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1486 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1488 assert(pic_param->mv_fields.bits.mv_mode < 4);
1489 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1492 if (pic_param->sequence_fields.bits.interlace == 1 &&
1493 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1494 /* FIXME: calculate reference field picture polarity */
1496 ref_field_pic_polarity = 0;
1499 if (pic_param->b_picture_fraction < 21)
1500 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1502 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1504 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1505 picture_type == GEN7_VC1_I_PICTURE)
1506 picture_type = GEN7_VC1_BI_PICTURE;
1508 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1509 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1511 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1514 * 8.3.6.2.1 Transform Type Selection
1515 * If variable-sized transform coding is not enabled,
1516 * then the 8x8 transform shall be used for all blocks.
1517 * it is also MFX_VC1_PIC_STATE requirement.
1519 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1520 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1521 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1525 if (picture_type == GEN7_VC1_B_PICTURE) {
1526 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1528 obj_surface = decode_state->reference_objects[1];
1531 gen7_vc1_surface = obj_surface->private_data;
1533 if (!gen7_vc1_surface ||
1534 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1535 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1536 dmv_surface_valid = 0;
1538 dmv_surface_valid = 1;
1541 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1543 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1544 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1546 if (pic_param->picture_fields.bits.top_field_first)
1552 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1553 brfd = pic_param->reference_fields.bits.reference_distance;
1554 brfd = (scale_factor * brfd) >> 8;
1555 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1562 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1563 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1564 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1568 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1569 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1572 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1573 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1574 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1576 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1577 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1583 assert(pic_param->conditional_overlap_flag < 3);
1584 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1586 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1587 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1588 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1589 interpolation_mode = 9; /* Half-pel bilinear */
1590 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1591 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1592 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1593 interpolation_mode = 1; /* Half-pel bicubic */
1595 interpolation_mode = 0; /* Quarter-pel bicubic */
1597 BEGIN_BCS_BATCH(batch, 6);
1598 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1599 OUT_BCS_BATCH(batch,
1600 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1601 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1602 OUT_BCS_BATCH(batch,
1603 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1604 dmv_surface_valid << 15 |
1605 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1606 pic_param->rounding_control << 13 |
1607 pic_param->sequence_fields.bits.syncmarker << 12 |
1608 interpolation_mode << 8 |
1609 0 << 7 | /* FIXME: scale up or down ??? */
1610 pic_param->range_reduction_frame << 6 |
1611 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1613 !pic_param->picture_fields.bits.is_first_field << 3 |
1614 (pic_param->sequence_fields.bits.profile == 3) << 0);
1615 OUT_BCS_BATCH(batch,
1616 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1617 picture_type << 26 |
1620 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1622 OUT_BCS_BATCH(batch,
1623 unified_mv_mode << 28 |
1624 pic_param->mv_fields.bits.four_mv_switch << 27 |
1625 pic_param->fast_uvmc_flag << 26 |
1626 ref_field_pic_polarity << 25 |
1627 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1628 pic_param->reference_fields.bits.reference_distance << 20 |
1629 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1630 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1631 pic_param->mv_fields.bits.extended_mv_range << 8 |
1632 alt_pquant_edge_mask << 4 |
1633 alt_pquant_config << 2 |
1634 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1635 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1636 OUT_BCS_BATCH(batch,
1637 !!pic_param->bitplane_present.value << 31 |
1638 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1639 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1640 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1641 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1642 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1643 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1644 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1645 pic_param->mv_fields.bits.mv_table << 20 |
1646 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1647 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1648 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1649 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1650 pic_param->mb_mode_table << 8 |
1652 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1653 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1654 pic_param->cbp_table << 0);
1655 ADVANCE_BCS_BATCH(batch);
1659 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1660 struct decode_state *decode_state,
1661 struct gen7_mfd_context *gen7_mfd_context)
1663 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1664 VAPictureParameterBufferVC1 *pic_param;
1665 int intensitycomp_single;
1667 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1668 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1670 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1671 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1672 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1674 BEGIN_BCS_BATCH(batch, 6);
1675 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1676 OUT_BCS_BATCH(batch,
1677 0 << 14 | /* FIXME: double ??? */
1679 intensitycomp_single << 10 |
1680 intensitycomp_single << 8 |
1681 0 << 4 | /* FIXME: interlace mode */
1683 OUT_BCS_BATCH(batch,
1684 pic_param->luma_shift << 16 |
1685 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1686 OUT_BCS_BATCH(batch, 0);
1687 OUT_BCS_BATCH(batch, 0);
1688 OUT_BCS_BATCH(batch, 0);
1689 ADVANCE_BCS_BATCH(batch);
1693 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1694 struct decode_state *decode_state,
1695 struct gen7_mfd_context *gen7_mfd_context)
1697 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1698 struct object_surface *obj_surface;
1699 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1701 obj_surface = decode_state->render_object;
1703 if (obj_surface && obj_surface->private_data) {
1704 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1707 obj_surface = decode_state->reference_objects[1];
1709 if (obj_surface && obj_surface->private_data) {
1710 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1713 BEGIN_BCS_BATCH(batch, 7);
1714 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1716 if (dmv_write_buffer)
1717 OUT_BCS_RELOC(batch, dmv_write_buffer,
1718 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1721 OUT_BCS_BATCH(batch, 0);
1723 OUT_BCS_BATCH(batch, 0);
1724 OUT_BCS_BATCH(batch, 0);
1726 if (dmv_read_buffer)
1727 OUT_BCS_RELOC(batch, dmv_read_buffer,
1728 I915_GEM_DOMAIN_INSTRUCTION, 0,
1731 OUT_BCS_BATCH(batch, 0);
1733 OUT_BCS_BATCH(batch, 0);
1734 OUT_BCS_BATCH(batch, 0);
1736 ADVANCE_BCS_BATCH(batch);
1740 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1742 int out_slice_data_bit_offset;
1743 int slice_header_size = in_slice_data_bit_offset / 8;
1747 out_slice_data_bit_offset = in_slice_data_bit_offset;
1749 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1750 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1755 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1758 return out_slice_data_bit_offset;
1762 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1763 VAPictureParameterBufferVC1 *pic_param,
1764 VASliceParameterBufferVC1 *slice_param,
1765 VASliceParameterBufferVC1 *next_slice_param,
1766 dri_bo *slice_data_bo,
1767 struct gen7_mfd_context *gen7_mfd_context)
1769 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1770 int next_slice_start_vert_pos;
1771 int macroblock_offset;
1772 uint8_t *slice_data = NULL;
1774 dri_bo_map(slice_data_bo, 0);
1775 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1776 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1777 slice_param->macroblock_offset,
1778 pic_param->sequence_fields.bits.profile);
1779 dri_bo_unmap(slice_data_bo);
1781 if (next_slice_param)
1782 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1784 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1786 BEGIN_BCS_BATCH(batch, 5);
1787 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1788 OUT_BCS_BATCH(batch,
1789 slice_param->slice_data_size - (macroblock_offset >> 3));
1790 OUT_BCS_BATCH(batch,
1791 slice_param->slice_data_offset + (macroblock_offset >> 3));
1792 OUT_BCS_BATCH(batch,
1793 slice_param->slice_vertical_position << 16 |
1794 next_slice_start_vert_pos << 0);
1795 OUT_BCS_BATCH(batch,
1796 (macroblock_offset & 0x7));
1797 ADVANCE_BCS_BATCH(batch);
1801 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1802 struct decode_state *decode_state,
1803 struct gen7_mfd_context *gen7_mfd_context)
1805 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1806 VAPictureParameterBufferVC1 *pic_param;
1807 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1808 dri_bo *slice_data_bo;
1811 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1812 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1814 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1815 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1816 intel_batchbuffer_emit_mi_flush(batch);
1817 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1818 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1819 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1820 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1821 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1822 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1823 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1825 for (j = 0; j < decode_state->num_slice_params; j++) {
1826 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1827 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1828 slice_data_bo = decode_state->slice_datas[j]->bo;
1829 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1831 if (j == decode_state->num_slice_params - 1)
1832 next_slice_group_param = NULL;
1834 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1836 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1837 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1839 if (i < decode_state->slice_params[j]->num_elements - 1)
1840 next_slice_param = slice_param + 1;
1842 next_slice_param = next_slice_group_param;
1844 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1849 intel_batchbuffer_end_atomic(batch);
1850 intel_batchbuffer_flush(batch);
1854 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1855 struct decode_state *decode_state,
1856 struct gen7_mfd_context *gen7_mfd_context)
1858 struct object_surface *obj_surface;
1859 VAPictureParameterBufferJPEGBaseline *pic_param;
1860 int subsampling = SUBSAMPLE_YUV420;
1861 int fourcc = VA_FOURCC_IMC3;
1863 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1865 if (pic_param->num_components == 1)
1866 subsampling = SUBSAMPLE_YUV400;
1867 else if (pic_param->num_components == 3) {
1868 int h1 = pic_param->components[0].h_sampling_factor;
1869 int h2 = pic_param->components[1].h_sampling_factor;
1870 int h3 = pic_param->components[2].h_sampling_factor;
1871 int v1 = pic_param->components[0].v_sampling_factor;
1872 int v2 = pic_param->components[1].v_sampling_factor;
1873 int v3 = pic_param->components[2].v_sampling_factor;
1875 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1876 v1 == 2 && v2 == 1 && v3 == 1) {
1877 subsampling = SUBSAMPLE_YUV420;
1878 fourcc = VA_FOURCC_IMC3;
1879 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1880 v1 == 1 && v2 == 1 && v3 == 1) {
1881 subsampling = SUBSAMPLE_YUV422H;
1882 fourcc = VA_FOURCC_422H;
1883 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1884 v1 == 1 && v2 == 1 && v3 == 1) {
1885 subsampling = SUBSAMPLE_YUV444;
1886 fourcc = VA_FOURCC_444P;
1887 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1888 v1 == 1 && v2 == 1 && v3 == 1) {
1889 subsampling = SUBSAMPLE_YUV411;
1890 fourcc = VA_FOURCC_411P;
1891 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1892 v1 == 2 && v2 == 1 && v3 == 1) {
1893 subsampling = SUBSAMPLE_YUV422V;
1894 fourcc = VA_FOURCC_422V;
1895 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1896 v1 == 2 && v2 == 2 && v3 == 2) {
1897 subsampling = SUBSAMPLE_YUV422H;
1898 fourcc = VA_FOURCC_422H;
1899 } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1900 v1 == 2 && v2 == 1 && v3 == 1) {
1901 subsampling = SUBSAMPLE_YUV422V;
1902 fourcc = VA_FOURCC_422V;
1910 /* Current decoded picture */
1911 obj_surface = decode_state->render_object;
1912 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1914 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1915 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1916 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1917 gen7_mfd_context->pre_deblocking_output.valid = 1;
1919 gen7_mfd_context->post_deblocking_output.bo = NULL;
1920 gen7_mfd_context->post_deblocking_output.valid = 0;
1922 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1923 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1925 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1926 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1928 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1929 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1931 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1932 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1934 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1935 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1938 static const int va_to_gen7_jpeg_rotation[4] = {
1939 GEN7_JPEG_ROTATION_0,
1940 GEN7_JPEG_ROTATION_90,
1941 GEN7_JPEG_ROTATION_180,
1942 GEN7_JPEG_ROTATION_270
1946 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1947 struct decode_state *decode_state,
1948 struct gen7_mfd_context *gen7_mfd_context)
1950 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1951 VAPictureParameterBufferJPEGBaseline *pic_param;
1952 int chroma_type = GEN7_YUV420;
1953 int frame_width_in_blks;
1954 int frame_height_in_blks;
1956 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1957 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1959 if (pic_param->num_components == 1)
1960 chroma_type = GEN7_YUV400;
1961 else if (pic_param->num_components == 3) {
1962 int h1 = pic_param->components[0].h_sampling_factor;
1963 int h2 = pic_param->components[1].h_sampling_factor;
1964 int h3 = pic_param->components[2].h_sampling_factor;
1965 int v1 = pic_param->components[0].v_sampling_factor;
1966 int v2 = pic_param->components[1].v_sampling_factor;
1967 int v3 = pic_param->components[2].v_sampling_factor;
1969 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1970 v1 == 2 && v2 == 1 && v3 == 1)
1971 chroma_type = GEN7_YUV420;
1972 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1973 v1 == 1 && v2 == 1 && v3 == 1)
1974 chroma_type = GEN7_YUV422H_2Y;
1975 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1976 v1 == 1 && v2 == 1 && v3 == 1)
1977 chroma_type = GEN7_YUV444;
1978 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1979 v1 == 1 && v2 == 1 && v3 == 1)
1980 chroma_type = GEN7_YUV411;
1981 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1982 v1 == 2 && v2 == 1 && v3 == 1)
1983 chroma_type = GEN7_YUV422V_2Y;
1984 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1985 v1 == 2 && v2 == 2 && v3 == 2)
1986 chroma_type = GEN7_YUV422H_4Y;
1987 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1988 v1 == 2 && v2 == 1 && v3 == 1)
1989 chroma_type = GEN7_YUV422V_4Y;
1994 if (chroma_type == GEN7_YUV400 ||
1995 chroma_type == GEN7_YUV444 ||
1996 chroma_type == GEN7_YUV422V_2Y) {
1997 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
1998 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
1999 } else if (chroma_type == GEN7_YUV411) {
2000 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2001 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2003 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2004 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2007 BEGIN_BCS_BATCH(batch, 3);
2008 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2009 OUT_BCS_BATCH(batch,
2010 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2011 (chroma_type << 0));
2012 OUT_BCS_BATCH(batch,
2013 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2014 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2015 ADVANCE_BCS_BATCH(batch);
2018 static const int va_to_gen7_jpeg_hufftable[2] = {
2024 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2025 struct decode_state *decode_state,
2026 struct gen7_mfd_context *gen7_mfd_context,
2029 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2030 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2033 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2036 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2038 for (index = 0; index < num_tables; index++) {
2039 int id = va_to_gen7_jpeg_hufftable[index];
2040 if (!huffman_table->load_huffman_table[index])
2042 BEGIN_BCS_BATCH(batch, 53);
2043 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2044 OUT_BCS_BATCH(batch, id);
2045 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2046 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2047 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2048 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2049 ADVANCE_BCS_BATCH(batch);
2053 static const int va_to_gen7_jpeg_qm[5] = {
2055 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2056 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2057 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2058 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2062 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2063 struct decode_state *decode_state,
2064 struct gen7_mfd_context *gen7_mfd_context)
2066 VAPictureParameterBufferJPEGBaseline *pic_param;
2067 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2070 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2073 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2074 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2076 assert(pic_param->num_components <= 3);
2078 for (index = 0; index < pic_param->num_components; index++) {
2079 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2081 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2082 unsigned char raster_qm[64];
2085 if (id > 4 || id < 1)
2088 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2091 qm_type = va_to_gen7_jpeg_qm[id];
2093 for (j = 0; j < 64; j++)
2094 raster_qm[zigzag_direct[j]] = qm[j];
2096 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2101 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2102 VAPictureParameterBufferJPEGBaseline *pic_param,
2103 VASliceParameterBufferJPEGBaseline *slice_param,
2104 VASliceParameterBufferJPEGBaseline *next_slice_param,
2105 dri_bo *slice_data_bo,
2106 struct gen7_mfd_context *gen7_mfd_context)
2108 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2109 int scan_component_mask = 0;
2112 assert(slice_param->num_components > 0);
2113 assert(slice_param->num_components < 4);
2114 assert(slice_param->num_components <= pic_param->num_components);
2116 for (i = 0; i < slice_param->num_components; i++) {
2117 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2119 scan_component_mask |= (1 << 0);
2122 scan_component_mask |= (1 << 1);
2125 scan_component_mask |= (1 << 2);
2133 BEGIN_BCS_BATCH(batch, 6);
2134 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2135 OUT_BCS_BATCH(batch,
2136 slice_param->slice_data_size);
2137 OUT_BCS_BATCH(batch,
2138 slice_param->slice_data_offset);
2139 OUT_BCS_BATCH(batch,
2140 slice_param->slice_horizontal_position << 16 |
2141 slice_param->slice_vertical_position << 0);
2142 OUT_BCS_BATCH(batch,
2143 ((slice_param->num_components != 1) << 30) | /* interleaved */
2144 (scan_component_mask << 27) | /* scan components */
2145 (0 << 26) | /* disable interrupt allowed */
2146 (slice_param->num_mcus << 0)); /* MCU count */
2147 OUT_BCS_BATCH(batch,
2148 (slice_param->restart_interval << 0)); /* RestartInterval */
2149 ADVANCE_BCS_BATCH(batch);
2152 /* Workaround for JPEG decoding on Ivybridge */
2158 unsigned char data[32];
2160 int data_bit_offset;
2162 } gen7_jpeg_wa_clip = {
2166 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2167 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2175 gen8_jpeg_wa_init(VADriverContextP ctx,
2176 struct gen7_mfd_context *gen7_mfd_context)
2178 struct i965_driver_data *i965 = i965_driver_data(ctx);
2180 struct object_surface *obj_surface;
2182 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2183 i965_DestroySurfaces(ctx,
2184 &gen7_mfd_context->jpeg_wa_surface_id,
2187 status = i965_CreateSurfaces(ctx,
2188 gen7_jpeg_wa_clip.width,
2189 gen7_jpeg_wa_clip.height,
2190 VA_RT_FORMAT_YUV420,
2192 &gen7_mfd_context->jpeg_wa_surface_id);
2193 assert(status == VA_STATUS_SUCCESS);
2195 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2196 assert(obj_surface);
2197 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2198 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2200 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2201 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2205 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2207 gen7_jpeg_wa_clip.data_size,
2208 gen7_jpeg_wa_clip.data);
2213 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2214 struct gen7_mfd_context *gen7_mfd_context)
2216 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2218 BEGIN_BCS_BATCH(batch, 5);
2219 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2220 OUT_BCS_BATCH(batch,
2221 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2222 (MFD_MODE_VLD << 15) | /* VLD mode */
2223 (0 << 10) | /* disable Stream-Out */
2224 (0 << 9) | /* Post Deblocking Output */
2225 (1 << 8) | /* Pre Deblocking Output */
2226 (0 << 5) | /* not in stitch mode */
2227 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2228 (MFX_FORMAT_AVC << 0));
2229 OUT_BCS_BATCH(batch,
2230 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2231 (0 << 3) | /* terminate if AVC mbdata error occurs */
2232 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2235 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2236 OUT_BCS_BATCH(batch, 0); /* reserved */
2237 ADVANCE_BCS_BATCH(batch);
2241 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2242 struct gen7_mfd_context *gen7_mfd_context)
2244 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2245 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2247 BEGIN_BCS_BATCH(batch, 6);
2248 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2249 OUT_BCS_BATCH(batch, 0);
2250 OUT_BCS_BATCH(batch,
2251 ((obj_surface->orig_width - 1) << 18) |
2252 ((obj_surface->orig_height - 1) << 4));
2253 OUT_BCS_BATCH(batch,
2254 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2255 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2256 (0 << 22) | /* surface object control state, ignored */
2257 ((obj_surface->width - 1) << 3) | /* pitch */
2258 (0 << 2) | /* must be 0 */
2259 (1 << 1) | /* must be tiled */
2260 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2261 OUT_BCS_BATCH(batch,
2262 (0 << 16) | /* X offset for U(Cb), must be 0 */
2263 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2264 OUT_BCS_BATCH(batch,
2265 (0 << 16) | /* X offset for V(Cr), must be 0 */
2266 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2267 ADVANCE_BCS_BATCH(batch);
2271 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2272 struct gen7_mfd_context *gen7_mfd_context)
2274 struct i965_driver_data *i965 = i965_driver_data(ctx);
2275 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2276 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2280 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2285 BEGIN_BCS_BATCH(batch, 61);
2286 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2287 OUT_BCS_RELOC(batch,
2289 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2291 OUT_BCS_BATCH(batch, 0);
2292 OUT_BCS_BATCH(batch, 0);
2295 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2296 OUT_BCS_BATCH(batch, 0);
2297 OUT_BCS_BATCH(batch, 0);
2299 /* uncompressed-video & stream out 7-12 */
2300 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2301 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2302 OUT_BCS_BATCH(batch, 0);
2303 OUT_BCS_BATCH(batch, 0);
2304 OUT_BCS_BATCH(batch, 0);
2305 OUT_BCS_BATCH(batch, 0);
2307 /* the DW 13-15 is for intra row store scratch */
2308 OUT_BCS_RELOC(batch,
2310 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2312 OUT_BCS_BATCH(batch, 0);
2313 OUT_BCS_BATCH(batch, 0);
2315 /* the DW 16-18 is for deblocking filter */
2316 OUT_BCS_BATCH(batch, 0);
2317 OUT_BCS_BATCH(batch, 0);
2318 OUT_BCS_BATCH(batch, 0);
2321 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2322 OUT_BCS_BATCH(batch, 0);
2323 OUT_BCS_BATCH(batch, 0);
2325 OUT_BCS_BATCH(batch, 0);
2327 /* the DW52-54 is for mb status address */
2328 OUT_BCS_BATCH(batch, 0);
2329 OUT_BCS_BATCH(batch, 0);
2330 OUT_BCS_BATCH(batch, 0);
2331 /* the DW56-60 is for ILDB & second ILDB address */
2332 OUT_BCS_BATCH(batch, 0);
2333 OUT_BCS_BATCH(batch, 0);
2334 OUT_BCS_BATCH(batch, 0);
2335 OUT_BCS_BATCH(batch, 0);
2336 OUT_BCS_BATCH(batch, 0);
2337 OUT_BCS_BATCH(batch, 0);
2339 ADVANCE_BCS_BATCH(batch);
2341 dri_bo_unreference(intra_bo);
2345 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2346 struct gen7_mfd_context *gen7_mfd_context)
2348 struct i965_driver_data *i965 = i965_driver_data(ctx);
2349 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2350 dri_bo *bsd_mpc_bo, *mpr_bo;
2352 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2353 "bsd mpc row store",
2354 11520, /* 1.5 * 120 * 64 */
2357 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2359 7680, /* 1. 0 * 120 * 64 */
2362 BEGIN_BCS_BATCH(batch, 10);
2363 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2365 OUT_BCS_RELOC(batch,
2367 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2370 OUT_BCS_BATCH(batch, 0);
2371 OUT_BCS_BATCH(batch, 0);
2373 OUT_BCS_RELOC(batch,
2375 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2377 OUT_BCS_BATCH(batch, 0);
2378 OUT_BCS_BATCH(batch, 0);
2380 OUT_BCS_BATCH(batch, 0);
2381 OUT_BCS_BATCH(batch, 0);
2382 OUT_BCS_BATCH(batch, 0);
2384 ADVANCE_BCS_BATCH(batch);
2386 dri_bo_unreference(bsd_mpc_bo);
2387 dri_bo_unreference(mpr_bo);
2391 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2392 struct gen7_mfd_context *gen7_mfd_context)
2398 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2399 struct gen7_mfd_context *gen7_mfd_context)
2401 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2403 int mbaff_frame_flag = 0;
2404 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2406 BEGIN_BCS_BATCH(batch, 16);
2407 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2408 OUT_BCS_BATCH(batch,
2409 width_in_mbs * height_in_mbs);
2410 OUT_BCS_BATCH(batch,
2411 ((height_in_mbs - 1) << 16) |
2412 ((width_in_mbs - 1) << 0));
2413 OUT_BCS_BATCH(batch,
2418 (0 << 12) | /* differ from GEN6 */
2421 OUT_BCS_BATCH(batch,
2422 (1 << 10) | /* 4:2:0 */
2423 (1 << 7) | /* CABAC */
2429 (mbaff_frame_flag << 1) |
2431 OUT_BCS_BATCH(batch, 0);
2432 OUT_BCS_BATCH(batch, 0);
2433 OUT_BCS_BATCH(batch, 0);
2434 OUT_BCS_BATCH(batch, 0);
2435 OUT_BCS_BATCH(batch, 0);
2436 OUT_BCS_BATCH(batch, 0);
2437 OUT_BCS_BATCH(batch, 0);
2438 OUT_BCS_BATCH(batch, 0);
2439 OUT_BCS_BATCH(batch, 0);
2440 OUT_BCS_BATCH(batch, 0);
2441 OUT_BCS_BATCH(batch, 0);
2442 ADVANCE_BCS_BATCH(batch);
2446 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2447 struct gen7_mfd_context *gen7_mfd_context)
2449 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2452 BEGIN_BCS_BATCH(batch, 71);
2453 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2455 /* reference surfaces 0..15 */
2456 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2457 OUT_BCS_BATCH(batch, 0); /* top */
2458 OUT_BCS_BATCH(batch, 0); /* bottom */
2461 OUT_BCS_BATCH(batch, 0);
2463 /* the current decoding frame/field */
2464 OUT_BCS_BATCH(batch, 0); /* top */
2465 OUT_BCS_BATCH(batch, 0);
2466 OUT_BCS_BATCH(batch, 0);
2469 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2470 OUT_BCS_BATCH(batch, 0);
2471 OUT_BCS_BATCH(batch, 0);
2474 OUT_BCS_BATCH(batch, 0);
2475 OUT_BCS_BATCH(batch, 0);
2477 ADVANCE_BCS_BATCH(batch);
2481 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2482 struct gen7_mfd_context *gen7_mfd_context)
2484 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2486 BEGIN_BCS_BATCH(batch, 11);
2487 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2488 OUT_BCS_RELOC(batch,
2489 gen7_mfd_context->jpeg_wa_slice_data_bo,
2490 I915_GEM_DOMAIN_INSTRUCTION, 0,
2492 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2493 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2494 OUT_BCS_BATCH(batch, 0);
2495 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2496 OUT_BCS_BATCH(batch, 0);
2497 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2498 OUT_BCS_BATCH(batch, 0);
2499 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2500 OUT_BCS_BATCH(batch, 0);
2501 ADVANCE_BCS_BATCH(batch);
2505 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2506 struct gen7_mfd_context *gen7_mfd_context)
2508 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2510 /* the input bitsteam format on GEN7 differs from GEN6 */
2511 BEGIN_BCS_BATCH(batch, 6);
2512 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2513 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2514 OUT_BCS_BATCH(batch, 0);
2515 OUT_BCS_BATCH(batch,
2521 OUT_BCS_BATCH(batch,
2522 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2525 (1 << 3) | /* LastSlice Flag */
2526 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2527 OUT_BCS_BATCH(batch, 0);
2528 ADVANCE_BCS_BATCH(batch);
2532 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2533 struct gen7_mfd_context *gen7_mfd_context)
2535 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2536 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2537 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2538 int first_mb_in_slice = 0;
2539 int slice_type = SLICE_TYPE_I;
2541 BEGIN_BCS_BATCH(batch, 11);
2542 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2543 OUT_BCS_BATCH(batch, slice_type);
2544 OUT_BCS_BATCH(batch,
2545 (num_ref_idx_l1 << 24) |
2546 (num_ref_idx_l0 << 16) |
2549 OUT_BCS_BATCH(batch,
2551 (1 << 27) | /* disable Deblocking */
2553 (gen7_jpeg_wa_clip.qp << 16) |
2556 OUT_BCS_BATCH(batch,
2557 (slice_ver_pos << 24) |
2558 (slice_hor_pos << 16) |
2559 (first_mb_in_slice << 0));
2560 OUT_BCS_BATCH(batch,
2561 (next_slice_ver_pos << 16) |
2562 (next_slice_hor_pos << 0));
2563 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2564 OUT_BCS_BATCH(batch, 0);
2565 OUT_BCS_BATCH(batch, 0);
2566 OUT_BCS_BATCH(batch, 0);
2567 OUT_BCS_BATCH(batch, 0);
2568 ADVANCE_BCS_BATCH(batch);
2572 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2573 struct gen7_mfd_context *gen7_mfd_context)
2575 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2576 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2577 intel_batchbuffer_emit_mi_flush(batch);
2578 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2579 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2580 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2581 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2582 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2583 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2584 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2586 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2587 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2588 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2594 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2595 struct decode_state *decode_state,
2596 struct gen7_mfd_context *gen7_mfd_context)
2598 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2599 VAPictureParameterBufferJPEGBaseline *pic_param;
2600 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2601 dri_bo *slice_data_bo;
2602 int i, j, max_selector = 0;
2604 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2605 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2607 /* Currently only support Baseline DCT */
2608 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2609 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2611 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2613 intel_batchbuffer_emit_mi_flush(batch);
2614 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2615 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2616 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2617 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2618 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2620 for (j = 0; j < decode_state->num_slice_params; j++) {
2621 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2622 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2623 slice_data_bo = decode_state->slice_datas[j]->bo;
2624 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2626 if (j == decode_state->num_slice_params - 1)
2627 next_slice_group_param = NULL;
2629 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2631 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2634 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2636 if (i < decode_state->slice_params[j]->num_elements - 1)
2637 next_slice_param = slice_param + 1;
2639 next_slice_param = next_slice_group_param;
2641 for (component = 0; component < slice_param->num_components; component++) {
2642 if (max_selector < slice_param->components[component].dc_table_selector)
2643 max_selector = slice_param->components[component].dc_table_selector;
2645 if (max_selector < slice_param->components[component].ac_table_selector)
2646 max_selector = slice_param->components[component].ac_table_selector;
2653 assert(max_selector < 2);
2654 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2656 for (j = 0; j < decode_state->num_slice_params; j++) {
2657 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2658 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2659 slice_data_bo = decode_state->slice_datas[j]->bo;
2660 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2662 if (j == decode_state->num_slice_params - 1)
2663 next_slice_group_param = NULL;
2665 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2667 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2668 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2670 if (i < decode_state->slice_params[j]->num_elements - 1)
2671 next_slice_param = slice_param + 1;
2673 next_slice_param = next_slice_group_param;
2675 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2680 intel_batchbuffer_end_atomic(batch);
2681 intel_batchbuffer_flush(batch);
2684 static const int vp8_dc_qlookup[128] =
2686 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2687 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2688 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2689 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2690 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2691 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2692 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2693 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2696 static const int vp8_ac_qlookup[128] =
2698 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2699 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2700 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2701 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2702 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2703 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2704 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2705 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2708 static inline unsigned int vp8_clip_quantization_index(int index)
2719 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2720 struct decode_state *decode_state,
2721 struct gen7_mfd_context *gen7_mfd_context)
2723 struct object_surface *obj_surface;
2724 struct i965_driver_data *i965 = i965_driver_data(ctx);
2726 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2727 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2728 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2730 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2731 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2733 intel_update_vp8_frame_store_index(ctx,
2736 gen7_mfd_context->reference_surface);
2738 /* Current decoded picture */
2739 obj_surface = decode_state->render_object;
2740 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2742 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2743 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2744 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2745 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2747 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2748 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2749 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2750 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2752 intel_ensure_vp8_segmentation_buffer(ctx,
2753 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2755 /* The same as AVC */
2756 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2757 bo = dri_bo_alloc(i965->intel.bufmgr,
2762 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2763 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2765 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2766 bo = dri_bo_alloc(i965->intel.bufmgr,
2767 "deblocking filter row store",
2768 width_in_mbs * 64 * 4,
2771 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2772 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2774 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2775 bo = dri_bo_alloc(i965->intel.bufmgr,
2776 "bsd mpc row store",
2777 width_in_mbs * 64 * 2,
2780 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2781 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2783 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2784 bo = dri_bo_alloc(i965->intel.bufmgr,
2786 width_in_mbs * 64 * 2,
2789 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2790 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2792 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2796 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2797 struct decode_state *decode_state,
2798 struct gen7_mfd_context *gen7_mfd_context)
2800 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2801 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2802 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2803 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2804 dri_bo *probs_bo = decode_state->probability_data->bo;
2806 unsigned int quantization_value[4][6];
2808 /* There is no safe way to error out if the segmentation buffer
2809 could not be allocated. So, instead of aborting, simply decode
2810 something even if the result may look totally inacurate */
2811 const unsigned int enable_segmentation =
2812 pic_param->pic_fields.bits.segmentation_enabled &&
2813 gen7_mfd_context->segmentation_buffer.valid;
2815 log2num = (int)log2(slice_param->num_of_partitions - 1);
2817 BEGIN_BCS_BATCH(batch, 38);
2818 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2819 OUT_BCS_BATCH(batch,
2820 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2821 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2822 OUT_BCS_BATCH(batch,
2824 pic_param->pic_fields.bits.sharpness_level << 16 |
2825 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2826 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2827 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2828 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2829 pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2830 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2831 (enable_segmentation &&
2832 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2833 (enable_segmentation &&
2834 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2835 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2836 pic_param->pic_fields.bits.filter_type << 4 |
2837 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2838 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2840 OUT_BCS_BATCH(batch,
2841 pic_param->loop_filter_level[3] << 24 |
2842 pic_param->loop_filter_level[2] << 16 |
2843 pic_param->loop_filter_level[1] << 8 |
2844 pic_param->loop_filter_level[0] << 0);
2846 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2847 for (i = 0; i < 4; i++) {
2848 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2849 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2850 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2851 /* 101581>>16 is equivalent to 155/100 */
2852 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2853 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2854 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2856 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2857 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2859 OUT_BCS_BATCH(batch,
2860 quantization_value[i][0] << 16 | /* Y1AC */
2861 quantization_value[i][1] << 0); /* Y1DC */
2862 OUT_BCS_BATCH(batch,
2863 quantization_value[i][5] << 16 | /* UVAC */
2864 quantization_value[i][4] << 0); /* UVDC */
2865 OUT_BCS_BATCH(batch,
2866 quantization_value[i][3] << 16 | /* Y2AC */
2867 quantization_value[i][2] << 0); /* Y2DC */
2870 /* CoeffProbability table for non-key frame, DW16-DW18 */
2872 OUT_BCS_RELOC(batch, probs_bo,
2873 0, I915_GEM_DOMAIN_INSTRUCTION,
2875 OUT_BCS_BATCH(batch, 0);
2876 OUT_BCS_BATCH(batch, 0);
2878 OUT_BCS_BATCH(batch, 0);
2879 OUT_BCS_BATCH(batch, 0);
2880 OUT_BCS_BATCH(batch, 0);
2883 OUT_BCS_BATCH(batch,
2884 pic_param->mb_segment_tree_probs[2] << 16 |
2885 pic_param->mb_segment_tree_probs[1] << 8 |
2886 pic_param->mb_segment_tree_probs[0] << 0);
2888 OUT_BCS_BATCH(batch,
2889 pic_param->prob_skip_false << 24 |
2890 pic_param->prob_intra << 16 |
2891 pic_param->prob_last << 8 |
2892 pic_param->prob_gf << 0);
2894 OUT_BCS_BATCH(batch,
2895 pic_param->y_mode_probs[3] << 24 |
2896 pic_param->y_mode_probs[2] << 16 |
2897 pic_param->y_mode_probs[1] << 8 |
2898 pic_param->y_mode_probs[0] << 0);
2900 OUT_BCS_BATCH(batch,
2901 pic_param->uv_mode_probs[2] << 16 |
2902 pic_param->uv_mode_probs[1] << 8 |
2903 pic_param->uv_mode_probs[0] << 0);
2905 /* MV update value, DW23-DW32 */
2906 for (i = 0; i < 2; i++) {
2907 for (j = 0; j < 20; j += 4) {
2908 OUT_BCS_BATCH(batch,
2909 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2910 pic_param->mv_probs[i][j + 2] << 16 |
2911 pic_param->mv_probs[i][j + 1] << 8 |
2912 pic_param->mv_probs[i][j + 0] << 0);
2916 OUT_BCS_BATCH(batch,
2917 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2918 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2919 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2920 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2922 OUT_BCS_BATCH(batch,
2923 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2924 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2925 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2926 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2928 /* segmentation id stream base address, DW35-DW37 */
2929 if (enable_segmentation) {
2930 OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2931 0, I915_GEM_DOMAIN_INSTRUCTION,
2933 OUT_BCS_BATCH(batch, 0);
2934 OUT_BCS_BATCH(batch, 0);
2937 OUT_BCS_BATCH(batch, 0);
2938 OUT_BCS_BATCH(batch, 0);
2939 OUT_BCS_BATCH(batch, 0);
2941 ADVANCE_BCS_BATCH(batch);
2945 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2946 VAPictureParameterBufferVP8 *pic_param,
2947 VASliceParameterBufferVP8 *slice_param,
2948 dri_bo *slice_data_bo,
2949 struct gen7_mfd_context *gen7_mfd_context)
2951 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2953 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2954 unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2955 unsigned int partition_size_0 = slice_param->partition_size[0];
2957 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2958 if (used_bits == 8) {
2961 partition_size_0 -= 1;
2964 assert(slice_param->num_of_partitions >= 2);
2965 assert(slice_param->num_of_partitions <= 9);
2967 log2num = (int)log2(slice_param->num_of_partitions - 1);
2969 BEGIN_BCS_BATCH(batch, 22);
2970 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2971 OUT_BCS_BATCH(batch,
2972 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2973 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2975 (slice_param->macroblock_offset & 0x7));
2976 OUT_BCS_BATCH(batch,
2977 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2980 OUT_BCS_BATCH(batch, partition_size_0);
2981 OUT_BCS_BATCH(batch, offset);
2982 //partion sizes in bytes are present after the above first partition when there are more than one token partition
2983 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2984 for (i = 1; i < 9; i++) {
2985 if (i < slice_param->num_of_partitions) {
2986 OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2987 OUT_BCS_BATCH(batch, offset);
2989 OUT_BCS_BATCH(batch, 0);
2990 OUT_BCS_BATCH(batch, 0);
2993 offset += slice_param->partition_size[i];
2996 OUT_BCS_BATCH(batch,
2997 1 << 31 | /* concealment method */
3000 ADVANCE_BCS_BATCH(batch);
3004 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3005 struct decode_state *decode_state,
3006 struct gen7_mfd_context *gen7_mfd_context)
3008 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3009 VAPictureParameterBufferVP8 *pic_param;
3010 VASliceParameterBufferVP8 *slice_param;
3011 dri_bo *slice_data_bo;
3013 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3014 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3016 /* one slice per frame */
3017 if (decode_state->num_slice_params != 1 ||
3018 (!decode_state->slice_params ||
3019 !decode_state->slice_params[0] ||
3020 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3021 (!decode_state->slice_datas ||
3022 !decode_state->slice_datas[0] ||
3023 !decode_state->slice_datas[0]->bo) ||
3024 !decode_state->probability_data) {
3025 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3030 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3031 slice_data_bo = decode_state->slice_datas[0]->bo;
3033 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3034 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3035 intel_batchbuffer_emit_mi_flush(batch);
3036 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3037 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3038 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3039 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3040 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3041 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3042 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3043 intel_batchbuffer_end_atomic(batch);
3044 intel_batchbuffer_flush(batch);
3048 gen8_mfd_decode_picture(VADriverContextP ctx,
3050 union codec_state *codec_state,
3051 struct hw_context *hw_context)
3054 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3055 struct decode_state *decode_state = &codec_state->decode;
3058 assert(gen7_mfd_context);
3060 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3062 if (vaStatus != VA_STATUS_SUCCESS)
3065 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3068 case VAProfileMPEG2Simple:
3069 case VAProfileMPEG2Main:
3070 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3073 case VAProfileH264ConstrainedBaseline:
3074 case VAProfileH264Main:
3075 case VAProfileH264High:
3076 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3079 case VAProfileVC1Simple:
3080 case VAProfileVC1Main:
3081 case VAProfileVC1Advanced:
3082 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3085 case VAProfileJPEGBaseline:
3086 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3089 case VAProfileVP8Version0_3:
3090 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3098 vaStatus = VA_STATUS_SUCCESS;
3105 gen8_mfd_context_destroy(void *hw_context)
3107 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3109 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3110 gen7_mfd_context->post_deblocking_output.bo = NULL;
3112 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3113 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3115 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3116 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3118 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3119 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3121 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3122 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3124 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3125 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3127 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3128 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3130 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3131 gen7_mfd_context->segmentation_buffer.bo = NULL;
3133 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3135 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3136 free(gen7_mfd_context);
3139 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3140 struct gen7_mfd_context *gen7_mfd_context)
3142 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3143 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3144 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3145 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3149 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3151 struct intel_driver_data *intel = intel_driver_data(ctx);
3152 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3155 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3156 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3157 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3159 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3160 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3161 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3164 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3165 gen7_mfd_context->segmentation_buffer.valid = 0;
3167 switch (obj_config->profile) {
3168 case VAProfileMPEG2Simple:
3169 case VAProfileMPEG2Main:
3170 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3173 case VAProfileH264ConstrainedBaseline:
3174 case VAProfileH264Main:
3175 case VAProfileH264High:
3176 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3181 return (struct hw_context *)gen7_mfd_context;