2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 gen7_avc_surface->frame_store_id = -1;
78 assert((obj_surface->size & 0x3f) == 0);
79 obj_surface->private_data = gen7_avc_surface;
82 /* DMV buffers now relate to the whole frame, irrespective of
84 if (gen7_avc_surface->dmv_top == NULL) {
85 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86 "direct mv w/r buffer",
87 width_in_mbs * height_in_mbs * 128,
89 assert(gen7_avc_surface->dmv_top);
94 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
95 struct decode_state *decode_state,
97 struct gen7_mfd_context *gen7_mfd_context)
99 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
101 assert(standard_select == MFX_FORMAT_MPEG2 ||
102 standard_select == MFX_FORMAT_AVC ||
103 standard_select == MFX_FORMAT_VC1 ||
104 standard_select == MFX_FORMAT_JPEG ||
105 standard_select == MFX_FORMAT_VP8);
107 BEGIN_BCS_BATCH(batch, 5);
108 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
110 (MFX_LONG_MODE << 17) | /* Currently only support long format */
111 (MFD_MODE_VLD << 15) | /* VLD mode */
112 (0 << 10) | /* disable Stream-Out */
113 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
114 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
115 (0 << 5) | /* not in stitch mode */
116 (MFX_CODEC_DECODE << 4) | /* decoding mode */
117 (standard_select << 0));
119 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
120 (0 << 3) | /* terminate if AVC mbdata error occurs */
121 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
124 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
125 OUT_BCS_BATCH(batch, 0); /* reserved */
126 ADVANCE_BCS_BATCH(batch);
130 gen8_mfd_surface_state(VADriverContextP ctx,
131 struct decode_state *decode_state,
133 struct gen7_mfd_context *gen7_mfd_context)
135 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
136 struct object_surface *obj_surface = decode_state->render_object;
137 unsigned int y_cb_offset;
138 unsigned int y_cr_offset;
139 unsigned int surface_format;
143 y_cb_offset = obj_surface->y_cb_offset;
144 y_cr_offset = obj_surface->y_cr_offset;
146 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
147 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
149 BEGIN_BCS_BATCH(batch, 6);
150 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
151 OUT_BCS_BATCH(batch, 0);
153 ((obj_surface->orig_height - 1) << 18) |
154 ((obj_surface->orig_width - 1) << 4));
156 (surface_format << 28) | /* 420 planar YUV surface */
157 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
158 (0 << 22) | /* surface object control state, ignored */
159 ((obj_surface->width - 1) << 3) | /* pitch */
160 (0 << 2) | /* must be 0 */
161 (1 << 1) | /* must be tiled */
162 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
164 (0 << 16) | /* X offset for U(Cb), must be 0 */
165 (y_cb_offset << 0)); /* Y offset for U(Cb) */
167 (0 << 16) | /* X offset for V(Cr), must be 0 */
168 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
169 ADVANCE_BCS_BATCH(batch);
173 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
174 struct decode_state *decode_state,
176 struct gen7_mfd_context *gen7_mfd_context)
178 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
181 BEGIN_BCS_BATCH(batch, 61);
182 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
183 /* Pre-deblock 1-3 */
184 if (gen7_mfd_context->pre_deblocking_output.valid)
185 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
186 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
189 OUT_BCS_BATCH(batch, 0);
191 OUT_BCS_BATCH(batch, 0);
192 OUT_BCS_BATCH(batch, 0);
193 /* Post-debloing 4-6 */
194 if (gen7_mfd_context->post_deblocking_output.valid)
195 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
196 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
199 OUT_BCS_BATCH(batch, 0);
201 OUT_BCS_BATCH(batch, 0);
202 OUT_BCS_BATCH(batch, 0);
204 /* uncompressed-video & stream out 7-12 */
205 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
207 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
209 OUT_BCS_BATCH(batch, 0);
210 OUT_BCS_BATCH(batch, 0);
212 /* intra row-store scratch 13-15 */
213 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
214 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
215 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
218 OUT_BCS_BATCH(batch, 0);
220 OUT_BCS_BATCH(batch, 0);
221 OUT_BCS_BATCH(batch, 0);
222 /* deblocking-filter-row-store 16-18 */
223 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
224 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
225 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
228 OUT_BCS_BATCH(batch, 0);
229 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, 0);
233 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
234 struct object_surface *obj_surface;
236 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
237 gen7_mfd_context->reference_surface[i].obj_surface &&
238 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
239 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
241 OUT_BCS_RELOC(batch, obj_surface->bo,
242 I915_GEM_DOMAIN_INSTRUCTION, 0,
245 OUT_BCS_BATCH(batch, 0);
248 OUT_BCS_BATCH(batch, 0);
251 /* reference property 51 */
252 OUT_BCS_BATCH(batch, 0);
254 /* Macroblock status & ILDB 52-57 */
255 OUT_BCS_BATCH(batch, 0);
256 OUT_BCS_BATCH(batch, 0);
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
259 OUT_BCS_BATCH(batch, 0);
260 OUT_BCS_BATCH(batch, 0);
262 /* the second Macroblock status 58-60 */
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
265 OUT_BCS_BATCH(batch, 0);
267 ADVANCE_BCS_BATCH(batch);
271 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
272 dri_bo *slice_data_bo,
274 struct gen7_mfd_context *gen7_mfd_context)
276 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
278 BEGIN_BCS_BATCH(batch, 26);
279 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
281 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
282 OUT_BCS_BATCH(batch, 0);
283 OUT_BCS_BATCH(batch, 0);
284 /* Upper bound 4-5 */
285 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
286 OUT_BCS_BATCH(batch, 0);
288 /* MFX indirect MV 6-10 */
289 OUT_BCS_BATCH(batch, 0);
290 OUT_BCS_BATCH(batch, 0);
291 OUT_BCS_BATCH(batch, 0);
292 OUT_BCS_BATCH(batch, 0);
293 OUT_BCS_BATCH(batch, 0);
295 /* MFX IT_COFF 11-15 */
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
299 OUT_BCS_BATCH(batch, 0);
300 OUT_BCS_BATCH(batch, 0);
302 /* MFX IT_DBLK 16-20 */
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
306 OUT_BCS_BATCH(batch, 0);
307 OUT_BCS_BATCH(batch, 0);
309 /* MFX PAK_BSE object for encoder 21-25 */
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
313 OUT_BCS_BATCH(batch, 0);
314 OUT_BCS_BATCH(batch, 0);
316 ADVANCE_BCS_BATCH(batch);
320 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
321 struct decode_state *decode_state,
323 struct gen7_mfd_context *gen7_mfd_context)
325 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
327 BEGIN_BCS_BATCH(batch, 10);
328 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
330 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
331 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
332 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
335 OUT_BCS_BATCH(batch, 0);
337 OUT_BCS_BATCH(batch, 0);
338 OUT_BCS_BATCH(batch, 0);
339 /* MPR Row Store Scratch buffer 4-6 */
340 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
341 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
342 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
345 OUT_BCS_BATCH(batch, 0);
347 OUT_BCS_BATCH(batch, 0);
348 OUT_BCS_BATCH(batch, 0);
351 if (gen7_mfd_context->bitplane_read_buffer.valid)
352 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
353 I915_GEM_DOMAIN_INSTRUCTION, 0,
356 OUT_BCS_BATCH(batch, 0);
357 OUT_BCS_BATCH(batch, 0);
358 OUT_BCS_BATCH(batch, 0);
359 ADVANCE_BCS_BATCH(batch);
363 gen8_mfd_qm_state(VADriverContextP ctx,
367 struct gen7_mfd_context *gen7_mfd_context)
369 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
370 unsigned int qm_buffer[16];
372 assert(qm_length <= 16 * 4);
373 memcpy(qm_buffer, qm, qm_length);
375 BEGIN_BCS_BATCH(batch, 18);
376 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
377 OUT_BCS_BATCH(batch, qm_type << 0);
378 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
379 ADVANCE_BCS_BATCH(batch);
383 gen8_mfd_avc_img_state(VADriverContextP ctx,
384 struct decode_state *decode_state,
385 struct gen7_mfd_context *gen7_mfd_context)
387 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
389 int mbaff_frame_flag;
390 unsigned int width_in_mbs, height_in_mbs;
391 VAPictureParameterBufferH264 *pic_param;
393 assert(decode_state->pic_param && decode_state->pic_param->buffer);
394 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
395 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
397 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
399 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
404 if ((img_struct & 0x1) == 0x1) {
405 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
407 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
410 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
411 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
412 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
414 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
417 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
418 !pic_param->pic_fields.bits.field_pic_flag);
420 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
421 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
423 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
424 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
425 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
426 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
428 BEGIN_BCS_BATCH(batch, 17);
429 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
431 (width_in_mbs * height_in_mbs - 1));
433 ((height_in_mbs - 1) << 16) |
434 ((width_in_mbs - 1) << 0));
436 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
437 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
438 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
439 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
440 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
441 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
444 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
445 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
446 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
447 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
448 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
449 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
450 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
451 (mbaff_frame_flag << 1) |
452 (pic_param->pic_fields.bits.field_pic_flag << 0));
453 OUT_BCS_BATCH(batch, 0);
454 OUT_BCS_BATCH(batch, 0);
455 OUT_BCS_BATCH(batch, 0);
456 OUT_BCS_BATCH(batch, 0);
457 OUT_BCS_BATCH(batch, 0);
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 OUT_BCS_BATCH(batch, 0);
465 ADVANCE_BCS_BATCH(batch);
469 gen8_mfd_avc_qm_state(VADriverContextP ctx,
470 struct decode_state *decode_state,
471 struct gen7_mfd_context *gen7_mfd_context)
473 VAIQMatrixBufferH264 *iq_matrix;
474 VAPictureParameterBufferH264 *pic_param;
476 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
477 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
479 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
481 assert(decode_state->pic_param && decode_state->pic_param->buffer);
482 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
484 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
485 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
487 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
488 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
489 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
494 gen8_mfd_avc_picid_state(VADriverContextP ctx,
495 struct decode_state *decode_state,
496 struct gen7_mfd_context *gen7_mfd_context)
498 gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
499 gen7_mfd_context->reference_surface);
503 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
504 struct decode_state *decode_state,
505 VAPictureParameterBufferH264 *pic_param,
506 VASliceParameterBufferH264 *slice_param,
507 struct gen7_mfd_context *gen7_mfd_context)
509 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
510 struct object_surface *obj_surface;
511 GenAvcSurface *gen7_avc_surface;
512 VAPictureH264 *va_pic;
515 BEGIN_BCS_BATCH(batch, 71);
516 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
518 /* reference surfaces 0..15 */
519 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
520 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
521 gen7_mfd_context->reference_surface[i].obj_surface &&
522 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
524 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
525 gen7_avc_surface = obj_surface->private_data;
527 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
528 I915_GEM_DOMAIN_INSTRUCTION, 0,
530 OUT_BCS_BATCH(batch, 0);
532 OUT_BCS_BATCH(batch, 0);
533 OUT_BCS_BATCH(batch, 0);
537 OUT_BCS_BATCH(batch, 0);
539 /* the current decoding frame/field */
540 va_pic = &pic_param->CurrPic;
541 obj_surface = decode_state->render_object;
542 assert(obj_surface->bo && obj_surface->private_data);
543 gen7_avc_surface = obj_surface->private_data;
545 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
546 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
549 OUT_BCS_BATCH(batch, 0);
550 OUT_BCS_BATCH(batch, 0);
553 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
554 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
557 const VAPictureH264 * const va_pic = avc_find_picture(
558 obj_surface->base.id, pic_param->ReferenceFrames,
559 ARRAY_ELEMS(pic_param->ReferenceFrames));
561 assert(va_pic != NULL);
562 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
563 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
565 OUT_BCS_BATCH(batch, 0);
566 OUT_BCS_BATCH(batch, 0);
570 va_pic = &pic_param->CurrPic;
571 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
572 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
574 ADVANCE_BCS_BATCH(batch);
578 gen8_mfd_avc_slice_state(VADriverContextP ctx,
579 VAPictureParameterBufferH264 *pic_param,
580 VASliceParameterBufferH264 *slice_param,
581 VASliceParameterBufferH264 *next_slice_param,
582 struct gen7_mfd_context *gen7_mfd_context)
584 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
585 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
586 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
587 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
588 int num_ref_idx_l0, num_ref_idx_l1;
589 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
590 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
591 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
594 if (slice_param->slice_type == SLICE_TYPE_I ||
595 slice_param->slice_type == SLICE_TYPE_SI) {
596 slice_type = SLICE_TYPE_I;
597 } else if (slice_param->slice_type == SLICE_TYPE_P ||
598 slice_param->slice_type == SLICE_TYPE_SP) {
599 slice_type = SLICE_TYPE_P;
601 assert(slice_param->slice_type == SLICE_TYPE_B);
602 slice_type = SLICE_TYPE_B;
605 if (slice_type == SLICE_TYPE_I) {
606 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
607 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
610 } else if (slice_type == SLICE_TYPE_P) {
611 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
612 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
615 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
616 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
619 first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
620 slice_hor_pos = first_mb_in_slice % width_in_mbs;
621 slice_ver_pos = first_mb_in_slice / width_in_mbs;
623 if (next_slice_param) {
624 first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
625 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
626 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
628 next_slice_hor_pos = 0;
629 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
632 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
633 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
634 OUT_BCS_BATCH(batch, slice_type);
636 (num_ref_idx_l1 << 24) |
637 (num_ref_idx_l0 << 16) |
638 (slice_param->chroma_log2_weight_denom << 8) |
639 (slice_param->luma_log2_weight_denom << 0));
641 (slice_param->direct_spatial_mv_pred_flag << 29) |
642 (slice_param->disable_deblocking_filter_idc << 27) |
643 (slice_param->cabac_init_idc << 24) |
644 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
645 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
646 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
648 (slice_ver_pos << 24) |
649 (slice_hor_pos << 16) |
650 (first_mb_in_slice << 0));
652 (next_slice_ver_pos << 16) |
653 (next_slice_hor_pos << 0));
655 (next_slice_param == NULL) << 19); /* last slice flag */
656 OUT_BCS_BATCH(batch, 0);
657 OUT_BCS_BATCH(batch, 0);
658 OUT_BCS_BATCH(batch, 0);
659 OUT_BCS_BATCH(batch, 0);
660 ADVANCE_BCS_BATCH(batch);
664 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
665 VAPictureParameterBufferH264 *pic_param,
666 VASliceParameterBufferH264 *slice_param,
667 struct gen7_mfd_context *gen7_mfd_context)
669 gen6_send_avc_ref_idx_state(
670 gen7_mfd_context->base.batch,
672 gen7_mfd_context->reference_surface
677 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
678 VAPictureParameterBufferH264 *pic_param,
679 VASliceParameterBufferH264 *slice_param,
680 struct gen7_mfd_context *gen7_mfd_context)
682 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
683 int i, j, num_weight_offset_table = 0;
684 short weightoffsets[32 * 6];
686 if ((slice_param->slice_type == SLICE_TYPE_P ||
687 slice_param->slice_type == SLICE_TYPE_SP) &&
688 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
689 num_weight_offset_table = 1;
692 if ((slice_param->slice_type == SLICE_TYPE_B) &&
693 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
694 num_weight_offset_table = 2;
697 for (i = 0; i < num_weight_offset_table; i++) {
698 BEGIN_BCS_BATCH(batch, 98);
699 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
700 OUT_BCS_BATCH(batch, i);
703 for (j = 0; j < 32; j++) {
704 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
705 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
706 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
707 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
708 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
709 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
712 for (j = 0; j < 32; j++) {
713 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
714 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
715 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
716 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
717 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
718 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
722 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
723 ADVANCE_BCS_BATCH(batch);
728 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
729 VAPictureParameterBufferH264 *pic_param,
730 VASliceParameterBufferH264 *slice_param,
731 dri_bo *slice_data_bo,
732 VASliceParameterBufferH264 *next_slice_param,
733 struct gen7_mfd_context *gen7_mfd_context)
735 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
736 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
738 pic_param->pic_fields.bits.entropy_coding_mode_flag);
740 /* the input bitsteam format on GEN7 differs from GEN6 */
741 BEGIN_BCS_BATCH(batch, 6);
742 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
744 (slice_param->slice_data_size));
745 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
753 ((slice_data_bit_offset >> 3) << 16) |
757 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
758 (slice_data_bit_offset & 0x7));
759 OUT_BCS_BATCH(batch, 0);
760 ADVANCE_BCS_BATCH(batch);
764 gen8_mfd_avc_context_init(
765 VADriverContextP ctx,
766 struct gen7_mfd_context *gen7_mfd_context
769 /* Initialize flat scaling lists */
770 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
774 gen8_mfd_avc_decode_init(VADriverContextP ctx,
775 struct decode_state *decode_state,
776 struct gen7_mfd_context *gen7_mfd_context)
778 VAPictureParameterBufferH264 *pic_param;
779 VASliceParameterBufferH264 *slice_param;
780 struct i965_driver_data *i965 = i965_driver_data(ctx);
781 struct object_surface *obj_surface;
783 int i, j, enable_avc_ildb = 0;
784 unsigned int width_in_mbs, height_in_mbs;
786 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
787 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
788 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
790 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
791 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
792 assert((slice_param->slice_type == SLICE_TYPE_I) ||
793 (slice_param->slice_type == SLICE_TYPE_SI) ||
794 (slice_param->slice_type == SLICE_TYPE_P) ||
795 (slice_param->slice_type == SLICE_TYPE_SP) ||
796 (slice_param->slice_type == SLICE_TYPE_B));
798 if (slice_param->disable_deblocking_filter_idc != 1) {
807 assert(decode_state->pic_param && decode_state->pic_param->buffer);
808 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
809 gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
810 gen7_mfd_context->reference_surface);
811 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
812 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
813 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
814 assert(height_in_mbs > 0 && height_in_mbs <= 256);
816 /* Current decoded picture */
817 obj_surface = decode_state->render_object;
818 if (pic_param->pic_fields.bits.reference_pic_flag)
819 obj_surface->flags |= SURFACE_REFERENCED;
821 obj_surface->flags &= ~SURFACE_REFERENCED;
823 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
824 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
826 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
827 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
828 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
829 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
831 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
832 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
833 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
834 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
836 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
837 bo = dri_bo_alloc(i965->intel.bufmgr,
842 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
843 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
845 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
846 bo = dri_bo_alloc(i965->intel.bufmgr,
847 "deblocking filter row store",
848 width_in_mbs * 64 * 4,
851 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
852 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
854 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
855 bo = dri_bo_alloc(i965->intel.bufmgr,
857 width_in_mbs * 64 * 2,
860 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
861 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
863 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
864 bo = dri_bo_alloc(i965->intel.bufmgr,
866 width_in_mbs * 64 * 2,
869 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
870 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
872 gen7_mfd_context->bitplane_read_buffer.valid = 0;
876 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
877 struct decode_state *decode_state,
878 struct gen7_mfd_context *gen7_mfd_context)
880 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
881 VAPictureParameterBufferH264 *pic_param;
882 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
883 dri_bo *slice_data_bo;
886 assert(decode_state->pic_param && decode_state->pic_param->buffer);
887 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
888 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
890 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
891 intel_batchbuffer_emit_mi_flush(batch);
892 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
893 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
894 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
895 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
896 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
897 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
898 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
900 for (j = 0; j < decode_state->num_slice_params; j++) {
901 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
902 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
903 slice_data_bo = decode_state->slice_datas[j]->bo;
904 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
906 if (j == decode_state->num_slice_params - 1)
907 next_slice_group_param = NULL;
909 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
911 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
912 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
913 assert((slice_param->slice_type == SLICE_TYPE_I) ||
914 (slice_param->slice_type == SLICE_TYPE_SI) ||
915 (slice_param->slice_type == SLICE_TYPE_P) ||
916 (slice_param->slice_type == SLICE_TYPE_SP) ||
917 (slice_param->slice_type == SLICE_TYPE_B));
919 if (i < decode_state->slice_params[j]->num_elements - 1)
920 next_slice_param = slice_param + 1;
922 next_slice_param = next_slice_group_param;
924 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
925 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
926 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
927 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
928 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
933 intel_batchbuffer_end_atomic(batch);
934 intel_batchbuffer_flush(batch);
938 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
939 struct decode_state *decode_state,
940 struct gen7_mfd_context *gen7_mfd_context)
942 VAPictureParameterBufferMPEG2 *pic_param;
943 struct i965_driver_data *i965 = i965_driver_data(ctx);
944 struct object_surface *obj_surface;
946 unsigned int width_in_mbs;
948 assert(decode_state->pic_param && decode_state->pic_param->buffer);
949 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
950 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
952 mpeg2_set_reference_surfaces(
954 gen7_mfd_context->reference_surface,
959 /* Current decoded picture */
960 obj_surface = decode_state->render_object;
961 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
963 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
964 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
965 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
966 gen7_mfd_context->pre_deblocking_output.valid = 1;
968 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
969 bo = dri_bo_alloc(i965->intel.bufmgr,
974 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
975 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
977 gen7_mfd_context->post_deblocking_output.valid = 0;
978 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
979 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
980 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
981 gen7_mfd_context->bitplane_read_buffer.valid = 0;
985 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
986 struct decode_state *decode_state,
987 struct gen7_mfd_context *gen7_mfd_context)
989 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
990 VAPictureParameterBufferMPEG2 *pic_param;
991 unsigned int slice_concealment_disable_bit = 0;
993 assert(decode_state->pic_param && decode_state->pic_param->buffer);
994 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
996 slice_concealment_disable_bit = 1;
998 BEGIN_BCS_BATCH(batch, 13);
999 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1000 OUT_BCS_BATCH(batch,
1001 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1002 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1003 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1004 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1005 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1006 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1007 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1008 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1009 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1010 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1011 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1012 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1013 OUT_BCS_BATCH(batch,
1014 pic_param->picture_coding_type << 9);
1015 OUT_BCS_BATCH(batch,
1016 (slice_concealment_disable_bit << 31) |
1017 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1018 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1019 OUT_BCS_BATCH(batch, 0);
1020 OUT_BCS_BATCH(batch, 0);
1021 OUT_BCS_BATCH(batch, 0);
1022 OUT_BCS_BATCH(batch, 0);
1023 OUT_BCS_BATCH(batch, 0);
1024 OUT_BCS_BATCH(batch, 0);
1025 OUT_BCS_BATCH(batch, 0);
1026 OUT_BCS_BATCH(batch, 0);
1027 OUT_BCS_BATCH(batch, 0);
1028 ADVANCE_BCS_BATCH(batch);
1032 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1033 struct decode_state *decode_state,
1034 struct gen7_mfd_context *gen7_mfd_context)
1036 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1039 /* Update internal QM state */
1040 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1041 VAIQMatrixBufferMPEG2 * const iq_matrix =
1042 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1044 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1045 iq_matrix->load_intra_quantiser_matrix) {
1046 gen_iq_matrix->load_intra_quantiser_matrix =
1047 iq_matrix->load_intra_quantiser_matrix;
1048 if (iq_matrix->load_intra_quantiser_matrix) {
1049 for (j = 0; j < 64; j++)
1050 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1051 iq_matrix->intra_quantiser_matrix[j];
1055 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1056 iq_matrix->load_non_intra_quantiser_matrix) {
1057 gen_iq_matrix->load_non_intra_quantiser_matrix =
1058 iq_matrix->load_non_intra_quantiser_matrix;
1059 if (iq_matrix->load_non_intra_quantiser_matrix) {
1060 for (j = 0; j < 64; j++)
1061 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1062 iq_matrix->non_intra_quantiser_matrix[j];
1067 /* Commit QM state to HW */
1068 for (i = 0; i < 2; i++) {
1069 unsigned char *qm = NULL;
1073 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1074 qm = gen_iq_matrix->intra_quantiser_matrix;
1075 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1078 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1079 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1080 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1087 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1092 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1093 VAPictureParameterBufferMPEG2 *pic_param,
1094 VASliceParameterBufferMPEG2 *slice_param,
1095 VASliceParameterBufferMPEG2 *next_slice_param,
1096 struct gen7_mfd_context *gen7_mfd_context)
1098 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1099 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1100 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1102 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1103 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1105 is_field_pic_wa = is_field_pic &&
1106 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1108 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1109 hpos0 = slice_param->slice_horizontal_position;
1111 if (next_slice_param == NULL) {
1112 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1115 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1116 hpos1 = next_slice_param->slice_horizontal_position;
1119 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1121 BEGIN_BCS_BATCH(batch, 5);
1122 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1123 OUT_BCS_BATCH(batch,
1124 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1125 OUT_BCS_BATCH(batch,
1126 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1127 OUT_BCS_BATCH(batch,
1131 (next_slice_param == NULL) << 5 |
1132 (next_slice_param == NULL) << 3 |
1133 (slice_param->macroblock_offset & 0x7));
1134 OUT_BCS_BATCH(batch,
1135 (slice_param->quantiser_scale_code << 24) |
1136 (vpos1 << 8 | hpos1));
1137 ADVANCE_BCS_BATCH(batch);
1141 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1142 struct decode_state *decode_state,
1143 struct gen7_mfd_context *gen7_mfd_context)
1145 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1146 VAPictureParameterBufferMPEG2 *pic_param;
1147 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1148 dri_bo *slice_data_bo;
1151 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1152 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1154 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1155 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1156 intel_batchbuffer_emit_mi_flush(batch);
1157 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1158 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1159 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1160 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1161 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1162 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1164 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1165 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1166 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1168 for (j = 0; j < decode_state->num_slice_params; j++) {
1169 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1170 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1171 slice_data_bo = decode_state->slice_datas[j]->bo;
1172 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1174 if (j == decode_state->num_slice_params - 1)
1175 next_slice_group_param = NULL;
1177 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1179 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1180 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1182 if (i < decode_state->slice_params[j]->num_elements - 1)
1183 next_slice_param = slice_param + 1;
1185 next_slice_param = next_slice_group_param;
1187 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1192 intel_batchbuffer_end_atomic(batch);
1193 intel_batchbuffer_flush(batch);
1196 static const int va_to_gen7_vc1_pic_type[5] = {
1200 GEN7_VC1_BI_PICTURE,
1204 static const int va_to_gen7_vc1_mv[4] = {
1206 2, /* 1-MV half-pel */
1207 3, /* 1-MV half-pef bilinear */
1211 static const int b_picture_scale_factor[21] = {
1212 128, 85, 170, 64, 192,
1213 51, 102, 153, 204, 43,
1214 215, 37, 74, 111, 148,
1215 185, 222, 32, 96, 160,
1219 static const int va_to_gen7_vc1_condover[3] = {
1225 static const int va_to_gen7_vc1_profile[4] = {
1226 GEN7_VC1_SIMPLE_PROFILE,
1227 GEN7_VC1_MAIN_PROFILE,
1228 GEN7_VC1_RESERVED_PROFILE,
1229 GEN7_VC1_ADVANCED_PROFILE
1233 gen8_mfd_free_vc1_surface(void **data)
1235 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1237 if (!gen7_vc1_surface)
1240 dri_bo_unreference(gen7_vc1_surface->dmv);
1241 free(gen7_vc1_surface);
1246 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1247 VAPictureParameterBufferVC1 *pic_param,
1248 struct object_surface *obj_surface)
1250 struct i965_driver_data *i965 = i965_driver_data(ctx);
1251 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1252 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1253 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1255 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1257 if (!gen7_vc1_surface) {
1258 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1259 assert((obj_surface->size & 0x3f) == 0);
1260 obj_surface->private_data = gen7_vc1_surface;
1263 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1265 if (gen7_vc1_surface->dmv == NULL) {
1266 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1267 "direct mv w/r buffer",
1268 width_in_mbs * height_in_mbs * 64,
1274 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1275 struct decode_state *decode_state,
1276 struct gen7_mfd_context *gen7_mfd_context)
1278 VAPictureParameterBufferVC1 *pic_param;
1279 struct i965_driver_data *i965 = i965_driver_data(ctx);
1280 struct object_surface *obj_surface;
1285 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1286 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1287 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1288 picture_type = pic_param->picture_fields.bits.picture_type;
1290 intel_update_vc1_frame_store_index(ctx,
1293 gen7_mfd_context->reference_surface);
1295 /* Current decoded picture */
1296 obj_surface = decode_state->render_object;
1297 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1298 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1300 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1301 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1302 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1303 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1305 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1306 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1307 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1308 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1310 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1311 bo = dri_bo_alloc(i965->intel.bufmgr,
1316 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1317 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1319 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1320 bo = dri_bo_alloc(i965->intel.bufmgr,
1321 "deblocking filter row store",
1322 width_in_mbs * 7 * 64,
1325 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1326 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1328 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1329 bo = dri_bo_alloc(i965->intel.bufmgr,
1330 "bsd mpc row store",
1334 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1335 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1337 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1339 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1340 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1342 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1343 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1344 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1345 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1347 uint8_t *src = NULL, *dst = NULL;
1349 assert(decode_state->bit_plane->buffer);
1350 src = decode_state->bit_plane->buffer;
1352 bo = dri_bo_alloc(i965->intel.bufmgr,
1354 bitplane_width * height_in_mbs,
1357 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1359 dri_bo_map(bo, True);
1360 assert(bo->virtual);
1363 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1364 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1365 int src_index, dst_index;
1369 src_index = (src_h * width_in_mbs + src_w) / 2;
1370 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1371 src_value = ((src[src_index] >> src_shift) & 0xf);
1373 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1377 dst_index = src_w / 2;
1378 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1382 dst[src_w / 2] >>= 4;
1384 dst += bitplane_width;
1389 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1393 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1394 struct decode_state *decode_state,
1395 struct gen7_mfd_context *gen7_mfd_context)
1397 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1398 VAPictureParameterBufferVC1 *pic_param;
1399 struct object_surface *obj_surface;
1400 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1401 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1402 int unified_mv_mode;
1403 int ref_field_pic_polarity = 0;
1404 int scale_factor = 0;
1406 int dmv_surface_valid = 0;
1412 int interpolation_mode = 0;
1414 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1415 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1417 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1418 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1419 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1420 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1421 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1422 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1423 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1424 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1427 alt_pquant_config = 0;
1428 alt_pquant_edge_mask = 0;
1429 } else if (dquant == 2) {
1430 alt_pquant_config = 1;
1431 alt_pquant_edge_mask = 0xf;
1433 assert(dquant == 1);
1434 if (dquantfrm == 0) {
1435 alt_pquant_config = 0;
1436 alt_pquant_edge_mask = 0;
1439 assert(dquantfrm == 1);
1440 alt_pquant_config = 1;
1442 switch (dqprofile) {
1444 if (dqbilevel == 0) {
1445 alt_pquant_config = 2;
1446 alt_pquant_edge_mask = 0;
1448 assert(dqbilevel == 1);
1449 alt_pquant_config = 3;
1450 alt_pquant_edge_mask = 0;
1455 alt_pquant_edge_mask = 0xf;
1460 alt_pquant_edge_mask = 0x9;
1462 alt_pquant_edge_mask = (0x3 << dqdbedge);
1467 alt_pquant_edge_mask = (0x1 << dqsbedge);
1476 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1477 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1478 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1480 assert(pic_param->mv_fields.bits.mv_mode < 4);
1481 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1484 if (pic_param->sequence_fields.bits.interlace == 1 &&
1485 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1486 /* FIXME: calculate reference field picture polarity */
1488 ref_field_pic_polarity = 0;
1491 if (pic_param->b_picture_fraction < 21)
1492 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1494 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1496 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1497 picture_type == GEN7_VC1_I_PICTURE)
1498 picture_type = GEN7_VC1_BI_PICTURE;
1500 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1501 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1503 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1506 * 8.3.6.2.1 Transform Type Selection
1507 * If variable-sized transform coding is not enabled,
1508 * then the 8x8 transform shall be used for all blocks.
1509 * it is also MFX_VC1_PIC_STATE requirement.
1511 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1512 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1513 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1517 if (picture_type == GEN7_VC1_B_PICTURE) {
1518 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1520 obj_surface = decode_state->reference_objects[1];
1523 gen7_vc1_surface = obj_surface->private_data;
1525 if (!gen7_vc1_surface ||
1526 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1527 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1528 dmv_surface_valid = 0;
1530 dmv_surface_valid = 1;
1533 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1535 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1536 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1538 if (pic_param->picture_fields.bits.top_field_first)
1544 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1545 brfd = pic_param->reference_fields.bits.reference_distance;
1546 brfd = (scale_factor * brfd) >> 8;
1547 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1554 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1555 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1556 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1560 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1561 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1564 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1565 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1566 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1568 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1569 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1575 assert(pic_param->conditional_overlap_flag < 3);
1576 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1578 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1579 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1580 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1581 interpolation_mode = 9; /* Half-pel bilinear */
1582 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1583 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1584 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1585 interpolation_mode = 1; /* Half-pel bicubic */
1587 interpolation_mode = 0; /* Quarter-pel bicubic */
1589 BEGIN_BCS_BATCH(batch, 6);
1590 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1591 OUT_BCS_BATCH(batch,
1592 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1593 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1594 OUT_BCS_BATCH(batch,
1595 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1596 dmv_surface_valid << 15 |
1597 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1598 pic_param->rounding_control << 13 |
1599 pic_param->sequence_fields.bits.syncmarker << 12 |
1600 interpolation_mode << 8 |
1601 0 << 7 | /* FIXME: scale up or down ??? */
1602 pic_param->range_reduction_frame << 6 |
1603 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1605 !pic_param->picture_fields.bits.is_first_field << 3 |
1606 (pic_param->sequence_fields.bits.profile == 3) << 0);
1607 OUT_BCS_BATCH(batch,
1608 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1609 picture_type << 26 |
1612 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1614 OUT_BCS_BATCH(batch,
1615 unified_mv_mode << 28 |
1616 pic_param->mv_fields.bits.four_mv_switch << 27 |
1617 pic_param->fast_uvmc_flag << 26 |
1618 ref_field_pic_polarity << 25 |
1619 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1620 pic_param->reference_fields.bits.reference_distance << 20 |
1621 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1622 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1623 pic_param->mv_fields.bits.extended_mv_range << 8 |
1624 alt_pquant_edge_mask << 4 |
1625 alt_pquant_config << 2 |
1626 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1627 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1628 OUT_BCS_BATCH(batch,
1629 !!pic_param->bitplane_present.value << 31 |
1630 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1631 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1632 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1633 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1634 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1635 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1636 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1637 pic_param->mv_fields.bits.mv_table << 20 |
1638 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1639 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1640 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1641 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1642 pic_param->mb_mode_table << 8 |
1644 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1645 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1646 pic_param->cbp_table << 0);
1647 ADVANCE_BCS_BATCH(batch);
1651 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1652 struct decode_state *decode_state,
1653 struct gen7_mfd_context *gen7_mfd_context)
1655 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1656 VAPictureParameterBufferVC1 *pic_param;
1657 int intensitycomp_single;
1659 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1660 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1662 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1663 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1664 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1666 BEGIN_BCS_BATCH(batch, 6);
1667 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1668 OUT_BCS_BATCH(batch,
1669 0 << 14 | /* FIXME: double ??? */
1671 intensitycomp_single << 10 |
1672 intensitycomp_single << 8 |
1673 0 << 4 | /* FIXME: interlace mode */
1675 OUT_BCS_BATCH(batch,
1676 pic_param->luma_shift << 16 |
1677 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1678 OUT_BCS_BATCH(batch, 0);
1679 OUT_BCS_BATCH(batch, 0);
1680 OUT_BCS_BATCH(batch, 0);
1681 ADVANCE_BCS_BATCH(batch);
1685 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1686 struct decode_state *decode_state,
1687 struct gen7_mfd_context *gen7_mfd_context)
1689 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1690 struct object_surface *obj_surface;
1691 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1693 obj_surface = decode_state->render_object;
1695 if (obj_surface && obj_surface->private_data) {
1696 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1699 obj_surface = decode_state->reference_objects[1];
1701 if (obj_surface && obj_surface->private_data) {
1702 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1705 BEGIN_BCS_BATCH(batch, 7);
1706 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1708 if (dmv_write_buffer)
1709 OUT_BCS_RELOC(batch, dmv_write_buffer,
1710 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1713 OUT_BCS_BATCH(batch, 0);
1715 OUT_BCS_BATCH(batch, 0);
1716 OUT_BCS_BATCH(batch, 0);
1718 if (dmv_read_buffer)
1719 OUT_BCS_RELOC(batch, dmv_read_buffer,
1720 I915_GEM_DOMAIN_INSTRUCTION, 0,
1723 OUT_BCS_BATCH(batch, 0);
1725 OUT_BCS_BATCH(batch, 0);
1726 OUT_BCS_BATCH(batch, 0);
1728 ADVANCE_BCS_BATCH(batch);
1732 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1734 int out_slice_data_bit_offset;
1735 int slice_header_size = in_slice_data_bit_offset / 8;
1739 out_slice_data_bit_offset = in_slice_data_bit_offset;
1741 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1742 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1747 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1750 return out_slice_data_bit_offset;
1754 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1755 VAPictureParameterBufferVC1 *pic_param,
1756 VASliceParameterBufferVC1 *slice_param,
1757 VASliceParameterBufferVC1 *next_slice_param,
1758 dri_bo *slice_data_bo,
1759 struct gen7_mfd_context *gen7_mfd_context)
1761 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1762 int next_slice_start_vert_pos;
1763 int macroblock_offset;
1764 uint8_t *slice_data = NULL;
1766 dri_bo_map(slice_data_bo, 0);
1767 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1768 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1769 slice_param->macroblock_offset,
1770 pic_param->sequence_fields.bits.profile);
1771 dri_bo_unmap(slice_data_bo);
1773 if (next_slice_param)
1774 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1776 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1778 BEGIN_BCS_BATCH(batch, 5);
1779 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1780 OUT_BCS_BATCH(batch,
1781 slice_param->slice_data_size - (macroblock_offset >> 3));
1782 OUT_BCS_BATCH(batch,
1783 slice_param->slice_data_offset + (macroblock_offset >> 3));
1784 OUT_BCS_BATCH(batch,
1785 slice_param->slice_vertical_position << 16 |
1786 next_slice_start_vert_pos << 0);
1787 OUT_BCS_BATCH(batch,
1788 (macroblock_offset & 0x7));
1789 ADVANCE_BCS_BATCH(batch);
1793 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1794 struct decode_state *decode_state,
1795 struct gen7_mfd_context *gen7_mfd_context)
1797 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1798 VAPictureParameterBufferVC1 *pic_param;
1799 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1800 dri_bo *slice_data_bo;
1803 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1804 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1806 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1807 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1808 intel_batchbuffer_emit_mi_flush(batch);
1809 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1810 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1811 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1812 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1813 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1814 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1815 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1817 for (j = 0; j < decode_state->num_slice_params; j++) {
1818 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1819 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1820 slice_data_bo = decode_state->slice_datas[j]->bo;
1821 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1823 if (j == decode_state->num_slice_params - 1)
1824 next_slice_group_param = NULL;
1826 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1828 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1829 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1831 if (i < decode_state->slice_params[j]->num_elements - 1)
1832 next_slice_param = slice_param + 1;
1834 next_slice_param = next_slice_group_param;
1836 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1841 intel_batchbuffer_end_atomic(batch);
1842 intel_batchbuffer_flush(batch);
1846 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1847 struct decode_state *decode_state,
1848 struct gen7_mfd_context *gen7_mfd_context)
1850 struct object_surface *obj_surface;
1851 VAPictureParameterBufferJPEGBaseline *pic_param;
1852 int subsampling = SUBSAMPLE_YUV420;
1853 int fourcc = VA_FOURCC_IMC3;
1855 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1857 if (pic_param->num_components == 1)
1858 subsampling = SUBSAMPLE_YUV400;
1859 else if (pic_param->num_components == 3) {
1860 int h1 = pic_param->components[0].h_sampling_factor;
1861 int h2 = pic_param->components[1].h_sampling_factor;
1862 int h3 = pic_param->components[2].h_sampling_factor;
1863 int v1 = pic_param->components[0].v_sampling_factor;
1864 int v2 = pic_param->components[1].v_sampling_factor;
1865 int v3 = pic_param->components[2].v_sampling_factor;
1867 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1868 v1 == 2 && v2 == 1 && v3 == 1) {
1869 subsampling = SUBSAMPLE_YUV420;
1870 fourcc = VA_FOURCC_IMC3;
1871 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1872 v1 == 1 && v2 == 1 && v3 == 1) {
1873 subsampling = SUBSAMPLE_YUV422H;
1874 fourcc = VA_FOURCC_422H;
1875 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1876 v1 == 1 && v2 == 1 && v3 == 1) {
1877 subsampling = SUBSAMPLE_YUV444;
1878 fourcc = VA_FOURCC_444P;
1879 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1880 v1 == 1 && v2 == 1 && v3 == 1) {
1881 subsampling = SUBSAMPLE_YUV411;
1882 fourcc = VA_FOURCC_411P;
1883 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1884 v1 == 2 && v2 == 1 && v3 == 1) {
1885 subsampling = SUBSAMPLE_YUV422V;
1886 fourcc = VA_FOURCC_422V;
1887 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1888 v1 == 2 && v2 == 2 && v3 == 2) {
1889 subsampling = SUBSAMPLE_YUV422H;
1890 fourcc = VA_FOURCC_422H;
1891 } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1892 v1 == 2 && v2 == 1 && v3 == 1) {
1893 subsampling = SUBSAMPLE_YUV422V;
1894 fourcc = VA_FOURCC_422V;
1902 /* Current decoded picture */
1903 obj_surface = decode_state->render_object;
1904 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1906 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1907 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1908 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1909 gen7_mfd_context->pre_deblocking_output.valid = 1;
1911 gen7_mfd_context->post_deblocking_output.bo = NULL;
1912 gen7_mfd_context->post_deblocking_output.valid = 0;
1914 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1915 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1917 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1918 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1920 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1921 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1923 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1924 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1926 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1927 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1930 static const int va_to_gen7_jpeg_rotation[4] = {
1931 GEN7_JPEG_ROTATION_0,
1932 GEN7_JPEG_ROTATION_90,
1933 GEN7_JPEG_ROTATION_180,
1934 GEN7_JPEG_ROTATION_270
1938 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1939 struct decode_state *decode_state,
1940 struct gen7_mfd_context *gen7_mfd_context)
1942 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1943 VAPictureParameterBufferJPEGBaseline *pic_param;
1944 int chroma_type = GEN7_YUV420;
1945 int frame_width_in_blks;
1946 int frame_height_in_blks;
1948 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1949 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1951 if (pic_param->num_components == 1)
1952 chroma_type = GEN7_YUV400;
1953 else if (pic_param->num_components == 3) {
1954 int h1 = pic_param->components[0].h_sampling_factor;
1955 int h2 = pic_param->components[1].h_sampling_factor;
1956 int h3 = pic_param->components[2].h_sampling_factor;
1957 int v1 = pic_param->components[0].v_sampling_factor;
1958 int v2 = pic_param->components[1].v_sampling_factor;
1959 int v3 = pic_param->components[2].v_sampling_factor;
1961 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1962 v1 == 2 && v2 == 1 && v3 == 1)
1963 chroma_type = GEN7_YUV420;
1964 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1965 v1 == 1 && v2 == 1 && v3 == 1)
1966 chroma_type = GEN7_YUV422H_2Y;
1967 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1968 v1 == 1 && v2 == 1 && v3 == 1)
1969 chroma_type = GEN7_YUV444;
1970 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1971 v1 == 1 && v2 == 1 && v3 == 1)
1972 chroma_type = GEN7_YUV411;
1973 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1974 v1 == 2 && v2 == 1 && v3 == 1)
1975 chroma_type = GEN7_YUV422V_2Y;
1976 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1977 v1 == 2 && v2 == 2 && v3 == 2)
1978 chroma_type = GEN7_YUV422H_4Y;
1979 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1980 v1 == 2 && v2 == 1 && v3 == 1)
1981 chroma_type = GEN7_YUV422V_4Y;
1986 if (chroma_type == GEN7_YUV400 ||
1987 chroma_type == GEN7_YUV444 ||
1988 chroma_type == GEN7_YUV422V_2Y) {
1989 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
1990 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
1991 } else if (chroma_type == GEN7_YUV411) {
1992 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
1993 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
1995 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
1996 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
1999 BEGIN_BCS_BATCH(batch, 3);
2000 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2001 OUT_BCS_BATCH(batch,
2002 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2003 (chroma_type << 0));
2004 OUT_BCS_BATCH(batch,
2005 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2006 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2007 ADVANCE_BCS_BATCH(batch);
2010 static const int va_to_gen7_jpeg_hufftable[2] = {
2016 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2017 struct decode_state *decode_state,
2018 struct gen7_mfd_context *gen7_mfd_context,
2021 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2022 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2025 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2028 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2030 for (index = 0; index < num_tables; index++) {
2031 int id = va_to_gen7_jpeg_hufftable[index];
2032 if (!huffman_table->load_huffman_table[index])
2034 BEGIN_BCS_BATCH(batch, 53);
2035 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2036 OUT_BCS_BATCH(batch, id);
2037 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2038 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2039 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2040 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2041 ADVANCE_BCS_BATCH(batch);
2045 static const int va_to_gen7_jpeg_qm[5] = {
2047 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2048 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2049 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2050 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2054 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2055 struct decode_state *decode_state,
2056 struct gen7_mfd_context *gen7_mfd_context)
2058 VAPictureParameterBufferJPEGBaseline *pic_param;
2059 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2062 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2065 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2066 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2068 assert(pic_param->num_components <= 3);
2070 for (index = 0; index < pic_param->num_components; index++) {
2071 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2073 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2074 unsigned char raster_qm[64];
2077 if (id > 4 || id < 1)
2080 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2083 qm_type = va_to_gen7_jpeg_qm[id];
2085 for (j = 0; j < 64; j++)
2086 raster_qm[zigzag_direct[j]] = qm[j];
2088 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2093 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2094 VAPictureParameterBufferJPEGBaseline *pic_param,
2095 VASliceParameterBufferJPEGBaseline *slice_param,
2096 VASliceParameterBufferJPEGBaseline *next_slice_param,
2097 dri_bo *slice_data_bo,
2098 struct gen7_mfd_context *gen7_mfd_context)
2100 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2101 int scan_component_mask = 0;
2104 assert(slice_param->num_components > 0);
2105 assert(slice_param->num_components < 4);
2106 assert(slice_param->num_components <= pic_param->num_components);
2108 for (i = 0; i < slice_param->num_components; i++) {
2109 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2111 scan_component_mask |= (1 << 0);
2114 scan_component_mask |= (1 << 1);
2117 scan_component_mask |= (1 << 2);
2125 BEGIN_BCS_BATCH(batch, 6);
2126 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2127 OUT_BCS_BATCH(batch,
2128 slice_param->slice_data_size);
2129 OUT_BCS_BATCH(batch,
2130 slice_param->slice_data_offset);
2131 OUT_BCS_BATCH(batch,
2132 slice_param->slice_horizontal_position << 16 |
2133 slice_param->slice_vertical_position << 0);
2134 OUT_BCS_BATCH(batch,
2135 ((slice_param->num_components != 1) << 30) | /* interleaved */
2136 (scan_component_mask << 27) | /* scan components */
2137 (0 << 26) | /* disable interrupt allowed */
2138 (slice_param->num_mcus << 0)); /* MCU count */
2139 OUT_BCS_BATCH(batch,
2140 (slice_param->restart_interval << 0)); /* RestartInterval */
2141 ADVANCE_BCS_BATCH(batch);
2144 /* Workaround for JPEG decoding on Ivybridge */
2150 unsigned char data[32];
2152 int data_bit_offset;
2154 } gen7_jpeg_wa_clip = {
2158 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2159 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2167 gen8_jpeg_wa_init(VADriverContextP ctx,
2168 struct gen7_mfd_context *gen7_mfd_context)
2170 struct i965_driver_data *i965 = i965_driver_data(ctx);
2172 struct object_surface *obj_surface;
2174 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2175 i965_DestroySurfaces(ctx,
2176 &gen7_mfd_context->jpeg_wa_surface_id,
2179 status = i965_CreateSurfaces(ctx,
2180 gen7_jpeg_wa_clip.width,
2181 gen7_jpeg_wa_clip.height,
2182 VA_RT_FORMAT_YUV420,
2184 &gen7_mfd_context->jpeg_wa_surface_id);
2185 assert(status == VA_STATUS_SUCCESS);
2187 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2188 assert(obj_surface);
2189 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2190 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2192 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2193 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2197 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2199 gen7_jpeg_wa_clip.data_size,
2200 gen7_jpeg_wa_clip.data);
2205 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2206 struct gen7_mfd_context *gen7_mfd_context)
2208 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2210 BEGIN_BCS_BATCH(batch, 5);
2211 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2212 OUT_BCS_BATCH(batch,
2213 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2214 (MFD_MODE_VLD << 15) | /* VLD mode */
2215 (0 << 10) | /* disable Stream-Out */
2216 (0 << 9) | /* Post Deblocking Output */
2217 (1 << 8) | /* Pre Deblocking Output */
2218 (0 << 5) | /* not in stitch mode */
2219 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2220 (MFX_FORMAT_AVC << 0));
2221 OUT_BCS_BATCH(batch,
2222 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2223 (0 << 3) | /* terminate if AVC mbdata error occurs */
2224 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2227 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2228 OUT_BCS_BATCH(batch, 0); /* reserved */
2229 ADVANCE_BCS_BATCH(batch);
2233 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2234 struct gen7_mfd_context *gen7_mfd_context)
2236 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2237 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2239 BEGIN_BCS_BATCH(batch, 6);
2240 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2241 OUT_BCS_BATCH(batch, 0);
2242 OUT_BCS_BATCH(batch,
2243 ((obj_surface->orig_width - 1) << 18) |
2244 ((obj_surface->orig_height - 1) << 4));
2245 OUT_BCS_BATCH(batch,
2246 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2247 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2248 (0 << 22) | /* surface object control state, ignored */
2249 ((obj_surface->width - 1) << 3) | /* pitch */
2250 (0 << 2) | /* must be 0 */
2251 (1 << 1) | /* must be tiled */
2252 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2253 OUT_BCS_BATCH(batch,
2254 (0 << 16) | /* X offset for U(Cb), must be 0 */
2255 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2256 OUT_BCS_BATCH(batch,
2257 (0 << 16) | /* X offset for V(Cr), must be 0 */
2258 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2259 ADVANCE_BCS_BATCH(batch);
2263 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2264 struct gen7_mfd_context *gen7_mfd_context)
2266 struct i965_driver_data *i965 = i965_driver_data(ctx);
2267 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2268 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2272 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2277 BEGIN_BCS_BATCH(batch, 61);
2278 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2279 OUT_BCS_RELOC(batch,
2281 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2283 OUT_BCS_BATCH(batch, 0);
2284 OUT_BCS_BATCH(batch, 0);
2287 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2288 OUT_BCS_BATCH(batch, 0);
2289 OUT_BCS_BATCH(batch, 0);
2291 /* uncompressed-video & stream out 7-12 */
2292 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2293 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2294 OUT_BCS_BATCH(batch, 0);
2295 OUT_BCS_BATCH(batch, 0);
2296 OUT_BCS_BATCH(batch, 0);
2297 OUT_BCS_BATCH(batch, 0);
2299 /* the DW 13-15 is for intra row store scratch */
2300 OUT_BCS_RELOC(batch,
2302 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2304 OUT_BCS_BATCH(batch, 0);
2305 OUT_BCS_BATCH(batch, 0);
2307 /* the DW 16-18 is for deblocking filter */
2308 OUT_BCS_BATCH(batch, 0);
2309 OUT_BCS_BATCH(batch, 0);
2310 OUT_BCS_BATCH(batch, 0);
2313 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2314 OUT_BCS_BATCH(batch, 0);
2315 OUT_BCS_BATCH(batch, 0);
2317 OUT_BCS_BATCH(batch, 0);
2319 /* the DW52-54 is for mb status address */
2320 OUT_BCS_BATCH(batch, 0);
2321 OUT_BCS_BATCH(batch, 0);
2322 OUT_BCS_BATCH(batch, 0);
2323 /* the DW56-60 is for ILDB & second ILDB address */
2324 OUT_BCS_BATCH(batch, 0);
2325 OUT_BCS_BATCH(batch, 0);
2326 OUT_BCS_BATCH(batch, 0);
2327 OUT_BCS_BATCH(batch, 0);
2328 OUT_BCS_BATCH(batch, 0);
2329 OUT_BCS_BATCH(batch, 0);
2331 ADVANCE_BCS_BATCH(batch);
2333 dri_bo_unreference(intra_bo);
2337 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2338 struct gen7_mfd_context *gen7_mfd_context)
2340 struct i965_driver_data *i965 = i965_driver_data(ctx);
2341 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2342 dri_bo *bsd_mpc_bo, *mpr_bo;
2344 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2345 "bsd mpc row store",
2346 11520, /* 1.5 * 120 * 64 */
2349 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2351 7680, /* 1. 0 * 120 * 64 */
2354 BEGIN_BCS_BATCH(batch, 10);
2355 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2357 OUT_BCS_RELOC(batch,
2359 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2362 OUT_BCS_BATCH(batch, 0);
2363 OUT_BCS_BATCH(batch, 0);
2365 OUT_BCS_RELOC(batch,
2367 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2369 OUT_BCS_BATCH(batch, 0);
2370 OUT_BCS_BATCH(batch, 0);
2372 OUT_BCS_BATCH(batch, 0);
2373 OUT_BCS_BATCH(batch, 0);
2374 OUT_BCS_BATCH(batch, 0);
2376 ADVANCE_BCS_BATCH(batch);
2378 dri_bo_unreference(bsd_mpc_bo);
2379 dri_bo_unreference(mpr_bo);
2383 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2384 struct gen7_mfd_context *gen7_mfd_context)
2390 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2391 struct gen7_mfd_context *gen7_mfd_context)
2393 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2395 int mbaff_frame_flag = 0;
2396 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2398 BEGIN_BCS_BATCH(batch, 16);
2399 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2400 OUT_BCS_BATCH(batch,
2401 width_in_mbs * height_in_mbs);
2402 OUT_BCS_BATCH(batch,
2403 ((height_in_mbs - 1) << 16) |
2404 ((width_in_mbs - 1) << 0));
2405 OUT_BCS_BATCH(batch,
2410 (0 << 12) | /* differ from GEN6 */
2413 OUT_BCS_BATCH(batch,
2414 (1 << 10) | /* 4:2:0 */
2415 (1 << 7) | /* CABAC */
2421 (mbaff_frame_flag << 1) |
2423 OUT_BCS_BATCH(batch, 0);
2424 OUT_BCS_BATCH(batch, 0);
2425 OUT_BCS_BATCH(batch, 0);
2426 OUT_BCS_BATCH(batch, 0);
2427 OUT_BCS_BATCH(batch, 0);
2428 OUT_BCS_BATCH(batch, 0);
2429 OUT_BCS_BATCH(batch, 0);
2430 OUT_BCS_BATCH(batch, 0);
2431 OUT_BCS_BATCH(batch, 0);
2432 OUT_BCS_BATCH(batch, 0);
2433 OUT_BCS_BATCH(batch, 0);
2434 ADVANCE_BCS_BATCH(batch);
2438 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2439 struct gen7_mfd_context *gen7_mfd_context)
2441 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2444 BEGIN_BCS_BATCH(batch, 71);
2445 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2447 /* reference surfaces 0..15 */
2448 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2449 OUT_BCS_BATCH(batch, 0); /* top */
2450 OUT_BCS_BATCH(batch, 0); /* bottom */
2453 OUT_BCS_BATCH(batch, 0);
2455 /* the current decoding frame/field */
2456 OUT_BCS_BATCH(batch, 0); /* top */
2457 OUT_BCS_BATCH(batch, 0);
2458 OUT_BCS_BATCH(batch, 0);
2461 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2462 OUT_BCS_BATCH(batch, 0);
2463 OUT_BCS_BATCH(batch, 0);
2466 OUT_BCS_BATCH(batch, 0);
2467 OUT_BCS_BATCH(batch, 0);
2469 ADVANCE_BCS_BATCH(batch);
2473 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2474 struct gen7_mfd_context *gen7_mfd_context)
2476 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2478 BEGIN_BCS_BATCH(batch, 11);
2479 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2480 OUT_BCS_RELOC(batch,
2481 gen7_mfd_context->jpeg_wa_slice_data_bo,
2482 I915_GEM_DOMAIN_INSTRUCTION, 0,
2484 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2485 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2486 OUT_BCS_BATCH(batch, 0);
2487 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2488 OUT_BCS_BATCH(batch, 0);
2489 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2490 OUT_BCS_BATCH(batch, 0);
2491 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2492 OUT_BCS_BATCH(batch, 0);
2493 ADVANCE_BCS_BATCH(batch);
2497 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2498 struct gen7_mfd_context *gen7_mfd_context)
2500 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2502 /* the input bitsteam format on GEN7 differs from GEN6 */
2503 BEGIN_BCS_BATCH(batch, 6);
2504 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2505 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2506 OUT_BCS_BATCH(batch, 0);
2507 OUT_BCS_BATCH(batch,
2513 OUT_BCS_BATCH(batch,
2514 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2517 (1 << 3) | /* LastSlice Flag */
2518 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2519 OUT_BCS_BATCH(batch, 0);
2520 ADVANCE_BCS_BATCH(batch);
2524 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2525 struct gen7_mfd_context *gen7_mfd_context)
2527 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2528 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2529 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2530 int first_mb_in_slice = 0;
2531 int slice_type = SLICE_TYPE_I;
2533 BEGIN_BCS_BATCH(batch, 11);
2534 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2535 OUT_BCS_BATCH(batch, slice_type);
2536 OUT_BCS_BATCH(batch,
2537 (num_ref_idx_l1 << 24) |
2538 (num_ref_idx_l0 << 16) |
2541 OUT_BCS_BATCH(batch,
2543 (1 << 27) | /* disable Deblocking */
2545 (gen7_jpeg_wa_clip.qp << 16) |
2548 OUT_BCS_BATCH(batch,
2549 (slice_ver_pos << 24) |
2550 (slice_hor_pos << 16) |
2551 (first_mb_in_slice << 0));
2552 OUT_BCS_BATCH(batch,
2553 (next_slice_ver_pos << 16) |
2554 (next_slice_hor_pos << 0));
2555 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2556 OUT_BCS_BATCH(batch, 0);
2557 OUT_BCS_BATCH(batch, 0);
2558 OUT_BCS_BATCH(batch, 0);
2559 OUT_BCS_BATCH(batch, 0);
2560 ADVANCE_BCS_BATCH(batch);
2564 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2565 struct gen7_mfd_context *gen7_mfd_context)
2567 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2568 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2569 intel_batchbuffer_emit_mi_flush(batch);
2570 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2571 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2572 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2573 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2574 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2575 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2576 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2578 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2579 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2580 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2586 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2587 struct decode_state *decode_state,
2588 struct gen7_mfd_context *gen7_mfd_context)
2590 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2591 VAPictureParameterBufferJPEGBaseline *pic_param;
2592 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2593 dri_bo *slice_data_bo;
2594 int i, j, max_selector = 0;
2596 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2597 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2599 /* Currently only support Baseline DCT */
2600 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2601 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2603 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2605 intel_batchbuffer_emit_mi_flush(batch);
2606 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2607 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2608 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2609 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2610 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2612 for (j = 0; j < decode_state->num_slice_params; j++) {
2613 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2614 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2615 slice_data_bo = decode_state->slice_datas[j]->bo;
2616 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2618 if (j == decode_state->num_slice_params - 1)
2619 next_slice_group_param = NULL;
2621 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2623 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2626 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2628 if (i < decode_state->slice_params[j]->num_elements - 1)
2629 next_slice_param = slice_param + 1;
2631 next_slice_param = next_slice_group_param;
2633 for (component = 0; component < slice_param->num_components; component++) {
2634 if (max_selector < slice_param->components[component].dc_table_selector)
2635 max_selector = slice_param->components[component].dc_table_selector;
2637 if (max_selector < slice_param->components[component].ac_table_selector)
2638 max_selector = slice_param->components[component].ac_table_selector;
2645 assert(max_selector < 2);
2646 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2648 for (j = 0; j < decode_state->num_slice_params; j++) {
2649 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2650 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2651 slice_data_bo = decode_state->slice_datas[j]->bo;
2652 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2654 if (j == decode_state->num_slice_params - 1)
2655 next_slice_group_param = NULL;
2657 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2659 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2660 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2662 if (i < decode_state->slice_params[j]->num_elements - 1)
2663 next_slice_param = slice_param + 1;
2665 next_slice_param = next_slice_group_param;
2667 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2672 intel_batchbuffer_end_atomic(batch);
2673 intel_batchbuffer_flush(batch);
2676 static const int vp8_dc_qlookup[128] =
2678 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2679 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2680 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2681 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2682 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2683 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2684 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2685 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2688 static const int vp8_ac_qlookup[128] =
2690 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2691 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2692 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2693 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2694 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2695 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2696 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2697 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2700 static inline unsigned int vp8_clip_quantization_index(int index)
2711 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2712 struct decode_state *decode_state,
2713 struct gen7_mfd_context *gen7_mfd_context)
2715 struct object_surface *obj_surface;
2716 struct i965_driver_data *i965 = i965_driver_data(ctx);
2718 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2719 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2720 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2722 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2723 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2725 intel_update_vp8_frame_store_index(ctx,
2728 gen7_mfd_context->reference_surface);
2730 /* Current decoded picture */
2731 obj_surface = decode_state->render_object;
2732 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2734 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2735 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2736 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2737 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2739 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2740 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2741 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2742 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2744 intel_ensure_vp8_segmentation_buffer(ctx,
2745 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2747 /* The same as AVC */
2748 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2749 bo = dri_bo_alloc(i965->intel.bufmgr,
2754 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2755 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2757 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2758 bo = dri_bo_alloc(i965->intel.bufmgr,
2759 "deblocking filter row store",
2760 width_in_mbs * 64 * 4,
2763 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2764 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2766 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2767 bo = dri_bo_alloc(i965->intel.bufmgr,
2768 "bsd mpc row store",
2769 width_in_mbs * 64 * 2,
2772 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2773 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2775 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2776 bo = dri_bo_alloc(i965->intel.bufmgr,
2778 width_in_mbs * 64 * 2,
2781 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2782 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2784 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2788 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2789 struct decode_state *decode_state,
2790 struct gen7_mfd_context *gen7_mfd_context)
2792 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2793 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2794 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2795 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2796 dri_bo *probs_bo = decode_state->probability_data->bo;
2798 unsigned int quantization_value[4][6];
2800 /* There is no safe way to error out if the segmentation buffer
2801 could not be allocated. So, instead of aborting, simply decode
2802 something even if the result may look totally inacurate */
2803 const unsigned int enable_segmentation =
2804 pic_param->pic_fields.bits.segmentation_enabled &&
2805 gen7_mfd_context->segmentation_buffer.valid;
2807 log2num = (int)log2(slice_param->num_of_partitions - 1);
2809 BEGIN_BCS_BATCH(batch, 38);
2810 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2811 OUT_BCS_BATCH(batch,
2812 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2813 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2814 OUT_BCS_BATCH(batch,
2816 pic_param->pic_fields.bits.sharpness_level << 16 |
2817 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2818 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2819 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2820 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2821 pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2822 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2823 (enable_segmentation &&
2824 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2825 (enable_segmentation &&
2826 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2827 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2828 pic_param->pic_fields.bits.filter_type << 4 |
2829 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2830 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2832 OUT_BCS_BATCH(batch,
2833 pic_param->loop_filter_level[3] << 24 |
2834 pic_param->loop_filter_level[2] << 16 |
2835 pic_param->loop_filter_level[1] << 8 |
2836 pic_param->loop_filter_level[0] << 0);
2838 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2839 for (i = 0; i < 4; i++) {
2840 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2841 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2842 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2843 /* 101581>>16 is equivalent to 155/100 */
2844 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2845 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2846 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2848 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2849 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2851 OUT_BCS_BATCH(batch,
2852 quantization_value[i][0] << 16 | /* Y1AC */
2853 quantization_value[i][1] << 0); /* Y1DC */
2854 OUT_BCS_BATCH(batch,
2855 quantization_value[i][5] << 16 | /* UVAC */
2856 quantization_value[i][4] << 0); /* UVDC */
2857 OUT_BCS_BATCH(batch,
2858 quantization_value[i][3] << 16 | /* Y2AC */
2859 quantization_value[i][2] << 0); /* Y2DC */
2862 /* CoeffProbability table for non-key frame, DW16-DW18 */
2864 OUT_BCS_RELOC(batch, probs_bo,
2865 0, I915_GEM_DOMAIN_INSTRUCTION,
2867 OUT_BCS_BATCH(batch, 0);
2868 OUT_BCS_BATCH(batch, 0);
2870 OUT_BCS_BATCH(batch, 0);
2871 OUT_BCS_BATCH(batch, 0);
2872 OUT_BCS_BATCH(batch, 0);
2875 OUT_BCS_BATCH(batch,
2876 pic_param->mb_segment_tree_probs[2] << 16 |
2877 pic_param->mb_segment_tree_probs[1] << 8 |
2878 pic_param->mb_segment_tree_probs[0] << 0);
2880 OUT_BCS_BATCH(batch,
2881 pic_param->prob_skip_false << 24 |
2882 pic_param->prob_intra << 16 |
2883 pic_param->prob_last << 8 |
2884 pic_param->prob_gf << 0);
2886 OUT_BCS_BATCH(batch,
2887 pic_param->y_mode_probs[3] << 24 |
2888 pic_param->y_mode_probs[2] << 16 |
2889 pic_param->y_mode_probs[1] << 8 |
2890 pic_param->y_mode_probs[0] << 0);
2892 OUT_BCS_BATCH(batch,
2893 pic_param->uv_mode_probs[2] << 16 |
2894 pic_param->uv_mode_probs[1] << 8 |
2895 pic_param->uv_mode_probs[0] << 0);
2897 /* MV update value, DW23-DW32 */
2898 for (i = 0; i < 2; i++) {
2899 for (j = 0; j < 20; j += 4) {
2900 OUT_BCS_BATCH(batch,
2901 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2902 pic_param->mv_probs[i][j + 2] << 16 |
2903 pic_param->mv_probs[i][j + 1] << 8 |
2904 pic_param->mv_probs[i][j + 0] << 0);
2908 OUT_BCS_BATCH(batch,
2909 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2910 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2911 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2912 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2914 OUT_BCS_BATCH(batch,
2915 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2916 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2917 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2918 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2920 /* segmentation id stream base address, DW35-DW37 */
2921 if (enable_segmentation) {
2922 OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2923 0, I915_GEM_DOMAIN_INSTRUCTION,
2925 OUT_BCS_BATCH(batch, 0);
2926 OUT_BCS_BATCH(batch, 0);
2929 OUT_BCS_BATCH(batch, 0);
2930 OUT_BCS_BATCH(batch, 0);
2931 OUT_BCS_BATCH(batch, 0);
2933 ADVANCE_BCS_BATCH(batch);
2937 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2938 VAPictureParameterBufferVP8 *pic_param,
2939 VASliceParameterBufferVP8 *slice_param,
2940 dri_bo *slice_data_bo,
2941 struct gen7_mfd_context *gen7_mfd_context)
2943 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2945 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2946 unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2947 unsigned int partition_size_0 = slice_param->partition_size[0];
2949 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2950 if (used_bits == 8) {
2953 partition_size_0 -= 1;
2956 assert(slice_param->num_of_partitions >= 2);
2957 assert(slice_param->num_of_partitions <= 9);
2959 log2num = (int)log2(slice_param->num_of_partitions - 1);
2961 BEGIN_BCS_BATCH(batch, 22);
2962 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2963 OUT_BCS_BATCH(batch,
2964 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2965 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2967 (slice_param->macroblock_offset & 0x7));
2968 OUT_BCS_BATCH(batch,
2969 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2972 OUT_BCS_BATCH(batch, partition_size_0);
2973 OUT_BCS_BATCH(batch, offset);
2974 //partion sizes in bytes are present after the above first partition when there are more than one token partition
2975 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2976 for (i = 1; i < 9; i++) {
2977 if (i < slice_param->num_of_partitions) {
2978 OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2979 OUT_BCS_BATCH(batch, offset);
2981 OUT_BCS_BATCH(batch, 0);
2982 OUT_BCS_BATCH(batch, 0);
2985 offset += slice_param->partition_size[i];
2988 OUT_BCS_BATCH(batch,
2989 1 << 31 | /* concealment method */
2992 ADVANCE_BCS_BATCH(batch);
2996 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
2997 struct decode_state *decode_state,
2998 struct gen7_mfd_context *gen7_mfd_context)
3000 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3001 VAPictureParameterBufferVP8 *pic_param;
3002 VASliceParameterBufferVP8 *slice_param;
3003 dri_bo *slice_data_bo;
3005 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3006 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3008 /* one slice per frame */
3009 if (decode_state->num_slice_params != 1 ||
3010 (!decode_state->slice_params ||
3011 !decode_state->slice_params[0] ||
3012 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3013 (!decode_state->slice_datas ||
3014 !decode_state->slice_datas[0] ||
3015 !decode_state->slice_datas[0]->bo) ||
3016 !decode_state->probability_data) {
3017 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3022 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3023 slice_data_bo = decode_state->slice_datas[0]->bo;
3025 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3026 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3027 intel_batchbuffer_emit_mi_flush(batch);
3028 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3029 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3030 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3031 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3032 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3033 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3034 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3035 intel_batchbuffer_end_atomic(batch);
3036 intel_batchbuffer_flush(batch);
3040 gen8_mfd_decode_picture(VADriverContextP ctx,
3042 union codec_state *codec_state,
3043 struct hw_context *hw_context)
3046 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3047 struct decode_state *decode_state = &codec_state->decode;
3050 assert(gen7_mfd_context);
3052 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3054 if (vaStatus != VA_STATUS_SUCCESS)
3057 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3060 case VAProfileMPEG2Simple:
3061 case VAProfileMPEG2Main:
3062 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3065 case VAProfileH264ConstrainedBaseline:
3066 case VAProfileH264Main:
3067 case VAProfileH264High:
3068 case VAProfileH264StereoHigh:
3069 case VAProfileH264MultiviewHigh:
3070 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3073 case VAProfileVC1Simple:
3074 case VAProfileVC1Main:
3075 case VAProfileVC1Advanced:
3076 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3079 case VAProfileJPEGBaseline:
3080 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3083 case VAProfileVP8Version0_3:
3084 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3092 vaStatus = VA_STATUS_SUCCESS;
3099 gen8_mfd_context_destroy(void *hw_context)
3101 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3103 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3104 gen7_mfd_context->post_deblocking_output.bo = NULL;
3106 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3107 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3109 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3110 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3112 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3113 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3115 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3116 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3118 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3119 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3121 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3122 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3124 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3125 gen7_mfd_context->segmentation_buffer.bo = NULL;
3127 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3129 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3130 free(gen7_mfd_context);
3133 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3134 struct gen7_mfd_context *gen7_mfd_context)
3136 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3137 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3138 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3139 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3143 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3145 struct intel_driver_data *intel = intel_driver_data(ctx);
3146 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3149 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3150 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3151 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3153 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3154 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3155 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3158 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3159 gen7_mfd_context->segmentation_buffer.valid = 0;
3161 switch (obj_config->profile) {
3162 case VAProfileMPEG2Simple:
3163 case VAProfileMPEG2Main:
3164 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3167 case VAProfileH264ConstrainedBaseline:
3168 case VAProfileH264Main:
3169 case VAProfileH264High:
3170 case VAProfileH264StereoHigh:
3171 case VAProfileH264MultiviewHigh:
3172 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3177 return (struct hw_context *)gen7_mfd_context;