2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 gen7_avc_surface->base.frame_store_id = -1;
78 assert((obj_surface->size & 0x3f) == 0);
79 obj_surface->private_data = gen7_avc_surface;
82 /* DMV buffers now relate to the whole frame, irrespective of
84 if (gen7_avc_surface->dmv_top == NULL) {
85 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86 "direct mv w/r buffer",
87 width_in_mbs * height_in_mbs * 128,
89 assert(gen7_avc_surface->dmv_top);
94 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
95 struct decode_state *decode_state,
97 struct gen7_mfd_context *gen7_mfd_context)
99 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
101 assert(standard_select == MFX_FORMAT_MPEG2 ||
102 standard_select == MFX_FORMAT_AVC ||
103 standard_select == MFX_FORMAT_VC1 ||
104 standard_select == MFX_FORMAT_JPEG ||
105 standard_select == MFX_FORMAT_VP8);
107 BEGIN_BCS_BATCH(batch, 5);
108 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
110 (MFX_LONG_MODE << 17) | /* Currently only support long format */
111 (MFD_MODE_VLD << 15) | /* VLD mode */
112 (0 << 10) | /* disable Stream-Out */
113 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
114 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
115 (0 << 5) | /* not in stitch mode */
116 (MFX_CODEC_DECODE << 4) | /* decoding mode */
117 (standard_select << 0));
119 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
120 (0 << 3) | /* terminate if AVC mbdata error occurs */
121 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
124 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
125 OUT_BCS_BATCH(batch, 0); /* reserved */
126 ADVANCE_BCS_BATCH(batch);
130 gen8_mfd_surface_state(VADriverContextP ctx,
131 struct decode_state *decode_state,
133 struct gen7_mfd_context *gen7_mfd_context)
135 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
136 struct object_surface *obj_surface = decode_state->render_object;
137 unsigned int y_cb_offset;
138 unsigned int y_cr_offset;
139 unsigned int surface_format;
143 y_cb_offset = obj_surface->y_cb_offset;
144 y_cr_offset = obj_surface->y_cr_offset;
146 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
147 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
149 BEGIN_BCS_BATCH(batch, 6);
150 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
151 OUT_BCS_BATCH(batch, 0);
153 ((obj_surface->orig_height - 1) << 18) |
154 ((obj_surface->orig_width - 1) << 4));
156 (surface_format << 28) | /* 420 planar YUV surface */
157 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
158 (0 << 22) | /* surface object control state, ignored */
159 ((obj_surface->width - 1) << 3) | /* pitch */
160 (0 << 2) | /* must be 0 */
161 (1 << 1) | /* must be tiled */
162 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
164 (0 << 16) | /* X offset for U(Cb), must be 0 */
165 (y_cb_offset << 0)); /* Y offset for U(Cb) */
167 (0 << 16) | /* X offset for V(Cr), must be 0 */
168 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
169 ADVANCE_BCS_BATCH(batch);
173 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
174 struct decode_state *decode_state,
176 struct gen7_mfd_context *gen7_mfd_context)
178 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
181 BEGIN_BCS_BATCH(batch, 61);
182 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
183 /* Pre-deblock 1-3 */
184 if (gen7_mfd_context->pre_deblocking_output.valid)
185 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
186 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
189 OUT_BCS_BATCH(batch, 0);
191 OUT_BCS_BATCH(batch, 0);
192 OUT_BCS_BATCH(batch, 0);
193 /* Post-debloing 4-6 */
194 if (gen7_mfd_context->post_deblocking_output.valid)
195 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
196 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
199 OUT_BCS_BATCH(batch, 0);
201 OUT_BCS_BATCH(batch, 0);
202 OUT_BCS_BATCH(batch, 0);
204 /* uncompressed-video & stream out 7-12 */
205 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
207 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
209 OUT_BCS_BATCH(batch, 0);
210 OUT_BCS_BATCH(batch, 0);
212 /* intra row-store scratch 13-15 */
213 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
214 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
215 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
218 OUT_BCS_BATCH(batch, 0);
220 OUT_BCS_BATCH(batch, 0);
221 OUT_BCS_BATCH(batch, 0);
222 /* deblocking-filter-row-store 16-18 */
223 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
224 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
225 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
228 OUT_BCS_BATCH(batch, 0);
229 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, 0);
233 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
234 struct object_surface *obj_surface;
236 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
237 gen7_mfd_context->reference_surface[i].obj_surface &&
238 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
239 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
241 OUT_BCS_RELOC(batch, obj_surface->bo,
242 I915_GEM_DOMAIN_INSTRUCTION, 0,
245 OUT_BCS_BATCH(batch, 0);
248 OUT_BCS_BATCH(batch, 0);
251 /* reference property 51 */
252 OUT_BCS_BATCH(batch, 0);
254 /* Macroblock status & ILDB 52-57 */
255 OUT_BCS_BATCH(batch, 0);
256 OUT_BCS_BATCH(batch, 0);
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
259 OUT_BCS_BATCH(batch, 0);
260 OUT_BCS_BATCH(batch, 0);
262 /* the second Macroblock status 58-60 */
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
265 OUT_BCS_BATCH(batch, 0);
267 ADVANCE_BCS_BATCH(batch);
271 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
272 dri_bo *slice_data_bo,
274 struct gen7_mfd_context *gen7_mfd_context)
276 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
278 BEGIN_BCS_BATCH(batch, 26);
279 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
281 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
282 OUT_BCS_BATCH(batch, 0);
283 OUT_BCS_BATCH(batch, 0);
284 /* Upper bound 4-5 */
285 OUT_BCS_BATCH(batch, 0);
286 OUT_BCS_BATCH(batch, 0);
288 /* MFX indirect MV 6-10 */
289 OUT_BCS_BATCH(batch, 0);
290 OUT_BCS_BATCH(batch, 0);
291 OUT_BCS_BATCH(batch, 0);
292 OUT_BCS_BATCH(batch, 0);
293 OUT_BCS_BATCH(batch, 0);
295 /* MFX IT_COFF 11-15 */
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
299 OUT_BCS_BATCH(batch, 0);
300 OUT_BCS_BATCH(batch, 0);
302 /* MFX IT_DBLK 16-20 */
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
306 OUT_BCS_BATCH(batch, 0);
307 OUT_BCS_BATCH(batch, 0);
309 /* MFX PAK_BSE object for encoder 21-25 */
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
313 OUT_BCS_BATCH(batch, 0);
314 OUT_BCS_BATCH(batch, 0);
316 ADVANCE_BCS_BATCH(batch);
320 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
321 struct decode_state *decode_state,
323 struct gen7_mfd_context *gen7_mfd_context)
325 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
327 BEGIN_BCS_BATCH(batch, 10);
328 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
330 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
331 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
332 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
335 OUT_BCS_BATCH(batch, 0);
337 OUT_BCS_BATCH(batch, 0);
338 OUT_BCS_BATCH(batch, 0);
339 /* MPR Row Store Scratch buffer 4-6 */
340 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
341 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
342 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
345 OUT_BCS_BATCH(batch, 0);
347 OUT_BCS_BATCH(batch, 0);
348 OUT_BCS_BATCH(batch, 0);
351 if (gen7_mfd_context->bitplane_read_buffer.valid)
352 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
353 I915_GEM_DOMAIN_INSTRUCTION, 0,
356 OUT_BCS_BATCH(batch, 0);
357 OUT_BCS_BATCH(batch, 0);
358 OUT_BCS_BATCH(batch, 0);
359 ADVANCE_BCS_BATCH(batch);
363 gen8_mfd_qm_state(VADriverContextP ctx,
367 struct gen7_mfd_context *gen7_mfd_context)
369 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
370 unsigned int qm_buffer[16];
372 assert(qm_length <= 16 * 4);
373 memcpy(qm_buffer, qm, qm_length);
375 BEGIN_BCS_BATCH(batch, 18);
376 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
377 OUT_BCS_BATCH(batch, qm_type << 0);
378 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
379 ADVANCE_BCS_BATCH(batch);
383 gen8_mfd_avc_img_state(VADriverContextP ctx,
384 struct decode_state *decode_state,
385 struct gen7_mfd_context *gen7_mfd_context)
387 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
389 int mbaff_frame_flag;
390 unsigned int width_in_mbs, height_in_mbs;
391 VAPictureParameterBufferH264 *pic_param;
393 assert(decode_state->pic_param && decode_state->pic_param->buffer);
394 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
395 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
397 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
399 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
404 if ((img_struct & 0x1) == 0x1) {
405 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
407 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
410 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
411 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
412 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
414 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
417 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
418 !pic_param->pic_fields.bits.field_pic_flag);
420 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
421 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
423 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
424 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
425 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
426 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
428 BEGIN_BCS_BATCH(batch, 17);
429 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
431 (width_in_mbs * height_in_mbs - 1));
433 ((height_in_mbs - 1) << 16) |
434 ((width_in_mbs - 1) << 0));
436 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
437 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
438 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
439 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
440 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
441 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
444 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
445 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
446 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
447 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
448 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
449 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
450 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
451 (mbaff_frame_flag << 1) |
452 (pic_param->pic_fields.bits.field_pic_flag << 0));
453 OUT_BCS_BATCH(batch, 0);
454 OUT_BCS_BATCH(batch, 0);
455 OUT_BCS_BATCH(batch, 0);
456 OUT_BCS_BATCH(batch, 0);
457 OUT_BCS_BATCH(batch, 0);
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 OUT_BCS_BATCH(batch, 0);
465 ADVANCE_BCS_BATCH(batch);
469 gen8_mfd_avc_qm_state(VADriverContextP ctx,
470 struct decode_state *decode_state,
471 struct gen7_mfd_context *gen7_mfd_context)
473 VAIQMatrixBufferH264 *iq_matrix;
474 VAPictureParameterBufferH264 *pic_param;
476 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
477 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
479 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
481 assert(decode_state->pic_param && decode_state->pic_param->buffer);
482 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
484 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
485 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
487 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
488 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
489 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
494 gen8_mfd_avc_picid_state(VADriverContextP ctx,
495 struct decode_state *decode_state,
496 struct gen7_mfd_context *gen7_mfd_context)
498 gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
499 gen7_mfd_context->reference_surface);
503 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
504 struct decode_state *decode_state,
505 VAPictureParameterBufferH264 *pic_param,
506 VASliceParameterBufferH264 *slice_param,
507 struct gen7_mfd_context *gen7_mfd_context)
509 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
510 struct object_surface *obj_surface;
511 GenAvcSurface *gen7_avc_surface;
512 VAPictureH264 *va_pic;
515 BEGIN_BCS_BATCH(batch, 71);
516 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
518 /* reference surfaces 0..15 */
519 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
520 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
521 gen7_mfd_context->reference_surface[i].obj_surface &&
522 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
524 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
525 gen7_avc_surface = obj_surface->private_data;
527 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
528 I915_GEM_DOMAIN_INSTRUCTION, 0,
530 OUT_BCS_BATCH(batch, 0);
532 OUT_BCS_BATCH(batch, 0);
533 OUT_BCS_BATCH(batch, 0);
537 OUT_BCS_BATCH(batch, 0);
539 /* the current decoding frame/field */
540 va_pic = &pic_param->CurrPic;
541 obj_surface = decode_state->render_object;
542 assert(obj_surface->bo && obj_surface->private_data);
543 gen7_avc_surface = obj_surface->private_data;
545 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
546 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
549 OUT_BCS_BATCH(batch, 0);
550 OUT_BCS_BATCH(batch, 0);
553 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
554 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
557 const VAPictureH264 * const va_pic = avc_find_picture(
558 obj_surface->base.id, pic_param->ReferenceFrames,
559 ARRAY_ELEMS(pic_param->ReferenceFrames));
561 assert(va_pic != NULL);
562 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
563 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
565 OUT_BCS_BATCH(batch, 0);
566 OUT_BCS_BATCH(batch, 0);
570 va_pic = &pic_param->CurrPic;
571 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
572 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
574 ADVANCE_BCS_BATCH(batch);
578 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
579 VAPictureParameterBufferH264 *pic_param,
580 VASliceParameterBufferH264 *next_slice_param,
581 struct gen7_mfd_context *gen7_mfd_context)
583 gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
587 gen8_mfd_avc_slice_state(VADriverContextP ctx,
588 VAPictureParameterBufferH264 *pic_param,
589 VASliceParameterBufferH264 *slice_param,
590 VASliceParameterBufferH264 *next_slice_param,
591 struct gen7_mfd_context *gen7_mfd_context)
593 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
594 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
595 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
596 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
597 int num_ref_idx_l0, num_ref_idx_l1;
598 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
599 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
600 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
603 if (slice_param->slice_type == SLICE_TYPE_I ||
604 slice_param->slice_type == SLICE_TYPE_SI) {
605 slice_type = SLICE_TYPE_I;
606 } else if (slice_param->slice_type == SLICE_TYPE_P ||
607 slice_param->slice_type == SLICE_TYPE_SP) {
608 slice_type = SLICE_TYPE_P;
610 assert(slice_param->slice_type == SLICE_TYPE_B);
611 slice_type = SLICE_TYPE_B;
614 if (slice_type == SLICE_TYPE_I) {
615 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
616 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
619 } else if (slice_type == SLICE_TYPE_P) {
620 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
621 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
624 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
625 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
628 first_mb_in_slice = slice_param->first_mb_in_slice;
629 slice_hor_pos = first_mb_in_slice % width_in_mbs;
630 slice_ver_pos = first_mb_in_slice / width_in_mbs;
633 slice_ver_pos = slice_ver_pos << 1;
634 if (next_slice_param) {
635 first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
636 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
637 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
640 next_slice_ver_pos = next_slice_ver_pos << 1;
642 next_slice_hor_pos = 0;
643 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
646 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
647 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
648 OUT_BCS_BATCH(batch, slice_type);
650 (num_ref_idx_l1 << 24) |
651 (num_ref_idx_l0 << 16) |
652 (slice_param->chroma_log2_weight_denom << 8) |
653 (slice_param->luma_log2_weight_denom << 0));
655 (slice_param->direct_spatial_mv_pred_flag << 29) |
656 (slice_param->disable_deblocking_filter_idc << 27) |
657 (slice_param->cabac_init_idc << 24) |
658 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
659 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
660 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
662 (slice_ver_pos << 24) |
663 (slice_hor_pos << 16) |
664 (first_mb_in_slice << 0));
666 (next_slice_ver_pos << 16) |
667 (next_slice_hor_pos << 0));
669 (next_slice_param == NULL) << 19); /* last slice flag */
670 OUT_BCS_BATCH(batch, 0);
671 OUT_BCS_BATCH(batch, 0);
672 OUT_BCS_BATCH(batch, 0);
673 OUT_BCS_BATCH(batch, 0);
674 ADVANCE_BCS_BATCH(batch);
678 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
679 VAPictureParameterBufferH264 *pic_param,
680 VASliceParameterBufferH264 *slice_param,
681 struct gen7_mfd_context *gen7_mfd_context)
683 gen6_send_avc_ref_idx_state(
684 gen7_mfd_context->base.batch,
686 gen7_mfd_context->reference_surface
691 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
692 VAPictureParameterBufferH264 *pic_param,
693 VASliceParameterBufferH264 *slice_param,
694 struct gen7_mfd_context *gen7_mfd_context)
696 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
697 int i, j, num_weight_offset_table = 0;
698 short weightoffsets[32 * 6];
700 if ((slice_param->slice_type == SLICE_TYPE_P ||
701 slice_param->slice_type == SLICE_TYPE_SP) &&
702 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
703 num_weight_offset_table = 1;
706 if ((slice_param->slice_type == SLICE_TYPE_B) &&
707 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
708 num_weight_offset_table = 2;
711 for (i = 0; i < num_weight_offset_table; i++) {
712 BEGIN_BCS_BATCH(batch, 98);
713 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
714 OUT_BCS_BATCH(batch, i);
717 for (j = 0; j < 32; j++) {
718 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
719 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
720 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
721 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
722 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
723 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
726 for (j = 0; j < 32; j++) {
727 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
728 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
729 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
730 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
731 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
732 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
736 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
737 ADVANCE_BCS_BATCH(batch);
742 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
743 VAPictureParameterBufferH264 *pic_param,
744 VASliceParameterBufferH264 *slice_param,
745 dri_bo *slice_data_bo,
746 VASliceParameterBufferH264 *next_slice_param,
747 struct gen7_mfd_context *gen7_mfd_context)
749 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
750 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
752 pic_param->pic_fields.bits.entropy_coding_mode_flag);
754 /* the input bitsteam format on GEN7 differs from GEN6 */
755 BEGIN_BCS_BATCH(batch, 6);
756 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
758 (slice_param->slice_data_size));
759 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
767 ((slice_data_bit_offset >> 3) << 16) |
771 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
772 (slice_data_bit_offset & 0x7));
773 OUT_BCS_BATCH(batch, 0);
774 ADVANCE_BCS_BATCH(batch);
778 gen8_mfd_avc_context_init(
779 VADriverContextP ctx,
780 struct gen7_mfd_context *gen7_mfd_context
783 /* Initialize flat scaling lists */
784 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
788 gen8_mfd_avc_decode_init(VADriverContextP ctx,
789 struct decode_state *decode_state,
790 struct gen7_mfd_context *gen7_mfd_context)
792 VAPictureParameterBufferH264 *pic_param;
793 VASliceParameterBufferH264 *slice_param;
794 struct i965_driver_data *i965 = i965_driver_data(ctx);
795 struct object_surface *obj_surface;
797 int i, j, enable_avc_ildb = 0;
798 unsigned int width_in_mbs, height_in_mbs;
800 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
801 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
802 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
804 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
805 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
806 assert((slice_param->slice_type == SLICE_TYPE_I) ||
807 (slice_param->slice_type == SLICE_TYPE_SI) ||
808 (slice_param->slice_type == SLICE_TYPE_P) ||
809 (slice_param->slice_type == SLICE_TYPE_SP) ||
810 (slice_param->slice_type == SLICE_TYPE_B));
812 if (slice_param->disable_deblocking_filter_idc != 1) {
821 assert(decode_state->pic_param && decode_state->pic_param->buffer);
822 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
823 gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
824 gen7_mfd_context->reference_surface);
825 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
826 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
827 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
828 assert(height_in_mbs > 0 && height_in_mbs <= 256);
830 /* Current decoded picture */
831 obj_surface = decode_state->render_object;
832 if (pic_param->pic_fields.bits.reference_pic_flag)
833 obj_surface->flags |= SURFACE_REFERENCED;
835 obj_surface->flags &= ~SURFACE_REFERENCED;
837 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
838 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
840 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
841 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
842 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
843 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
845 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
846 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
847 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
848 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
850 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
851 bo = dri_bo_alloc(i965->intel.bufmgr,
856 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
857 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
859 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
860 bo = dri_bo_alloc(i965->intel.bufmgr,
861 "deblocking filter row store",
862 width_in_mbs * 64 * 4,
865 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
866 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
868 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
869 bo = dri_bo_alloc(i965->intel.bufmgr,
871 width_in_mbs * 64 * 2,
874 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
875 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
877 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
878 bo = dri_bo_alloc(i965->intel.bufmgr,
880 width_in_mbs * 64 * 2,
883 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
884 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
886 gen7_mfd_context->bitplane_read_buffer.valid = 0;
890 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
891 struct decode_state *decode_state,
892 struct gen7_mfd_context *gen7_mfd_context)
894 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
895 VAPictureParameterBufferH264 *pic_param;
896 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
897 dri_bo *slice_data_bo;
900 assert(decode_state->pic_param && decode_state->pic_param->buffer);
901 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
902 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
904 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
905 intel_batchbuffer_emit_mi_flush(batch);
906 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
907 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
908 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
909 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
910 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
911 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
912 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
914 for (j = 0; j < decode_state->num_slice_params; j++) {
915 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
916 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
917 slice_data_bo = decode_state->slice_datas[j]->bo;
918 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
920 if (j == decode_state->num_slice_params - 1)
921 next_slice_group_param = NULL;
923 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
925 if (j == 0 && slice_param->first_mb_in_slice)
926 gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
928 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
929 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
930 assert((slice_param->slice_type == SLICE_TYPE_I) ||
931 (slice_param->slice_type == SLICE_TYPE_SI) ||
932 (slice_param->slice_type == SLICE_TYPE_P) ||
933 (slice_param->slice_type == SLICE_TYPE_SP) ||
934 (slice_param->slice_type == SLICE_TYPE_B));
936 if (i < decode_state->slice_params[j]->num_elements - 1)
937 next_slice_param = slice_param + 1;
939 next_slice_param = next_slice_group_param;
941 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
942 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
943 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
944 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
945 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
950 intel_batchbuffer_end_atomic(batch);
951 intel_batchbuffer_flush(batch);
955 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
956 struct decode_state *decode_state,
957 struct gen7_mfd_context *gen7_mfd_context)
959 VAPictureParameterBufferMPEG2 *pic_param;
960 struct i965_driver_data *i965 = i965_driver_data(ctx);
961 struct object_surface *obj_surface;
963 unsigned int width_in_mbs;
965 assert(decode_state->pic_param && decode_state->pic_param->buffer);
966 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
967 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
969 mpeg2_set_reference_surfaces(
971 gen7_mfd_context->reference_surface,
976 /* Current decoded picture */
977 obj_surface = decode_state->render_object;
978 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
980 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
981 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
982 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
983 gen7_mfd_context->pre_deblocking_output.valid = 1;
985 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
986 bo = dri_bo_alloc(i965->intel.bufmgr,
991 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
992 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
994 gen7_mfd_context->post_deblocking_output.valid = 0;
995 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
996 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
997 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
998 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1002 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1003 struct decode_state *decode_state,
1004 struct gen7_mfd_context *gen7_mfd_context)
1006 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1007 VAPictureParameterBufferMPEG2 *pic_param;
1008 unsigned int slice_concealment_disable_bit = 0;
1010 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1011 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1013 slice_concealment_disable_bit = 1;
1015 BEGIN_BCS_BATCH(batch, 13);
1016 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1017 OUT_BCS_BATCH(batch,
1018 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1019 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1020 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1021 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1022 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1023 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1024 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1025 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1026 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1027 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1028 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1029 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1030 OUT_BCS_BATCH(batch,
1031 pic_param->picture_coding_type << 9);
1032 OUT_BCS_BATCH(batch,
1033 (slice_concealment_disable_bit << 31) |
1034 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1035 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1036 OUT_BCS_BATCH(batch, 0);
1037 OUT_BCS_BATCH(batch, 0);
1038 OUT_BCS_BATCH(batch, 0);
1039 OUT_BCS_BATCH(batch, 0);
1040 OUT_BCS_BATCH(batch, 0);
1041 OUT_BCS_BATCH(batch, 0);
1042 OUT_BCS_BATCH(batch, 0);
1043 OUT_BCS_BATCH(batch, 0);
1044 OUT_BCS_BATCH(batch, 0);
1045 ADVANCE_BCS_BATCH(batch);
1049 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1050 struct decode_state *decode_state,
1051 struct gen7_mfd_context *gen7_mfd_context)
1053 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1056 /* Update internal QM state */
1057 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1058 VAIQMatrixBufferMPEG2 * const iq_matrix =
1059 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1061 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1062 iq_matrix->load_intra_quantiser_matrix) {
1063 gen_iq_matrix->load_intra_quantiser_matrix =
1064 iq_matrix->load_intra_quantiser_matrix;
1065 if (iq_matrix->load_intra_quantiser_matrix) {
1066 for (j = 0; j < 64; j++)
1067 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1068 iq_matrix->intra_quantiser_matrix[j];
1072 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1073 iq_matrix->load_non_intra_quantiser_matrix) {
1074 gen_iq_matrix->load_non_intra_quantiser_matrix =
1075 iq_matrix->load_non_intra_quantiser_matrix;
1076 if (iq_matrix->load_non_intra_quantiser_matrix) {
1077 for (j = 0; j < 64; j++)
1078 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1079 iq_matrix->non_intra_quantiser_matrix[j];
1084 /* Commit QM state to HW */
1085 for (i = 0; i < 2; i++) {
1086 unsigned char *qm = NULL;
1090 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1091 qm = gen_iq_matrix->intra_quantiser_matrix;
1092 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1095 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1096 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1097 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1104 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1109 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1110 VAPictureParameterBufferMPEG2 *pic_param,
1111 VASliceParameterBufferMPEG2 *slice_param,
1112 VASliceParameterBufferMPEG2 *next_slice_param,
1113 struct gen7_mfd_context *gen7_mfd_context)
1115 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1116 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1117 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1119 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1120 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1122 is_field_pic_wa = is_field_pic &&
1123 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1125 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1126 hpos0 = slice_param->slice_horizontal_position;
1128 if (next_slice_param == NULL) {
1129 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1132 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1133 hpos1 = next_slice_param->slice_horizontal_position;
1136 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1138 BEGIN_BCS_BATCH(batch, 5);
1139 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1140 OUT_BCS_BATCH(batch,
1141 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1142 OUT_BCS_BATCH(batch,
1143 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1144 OUT_BCS_BATCH(batch,
1148 (next_slice_param == NULL) << 5 |
1149 (next_slice_param == NULL) << 3 |
1150 (slice_param->macroblock_offset & 0x7));
1151 OUT_BCS_BATCH(batch,
1152 (slice_param->quantiser_scale_code << 24) |
1153 (vpos1 << 8 | hpos1));
1154 ADVANCE_BCS_BATCH(batch);
1158 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1159 struct decode_state *decode_state,
1160 struct gen7_mfd_context *gen7_mfd_context)
1162 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1163 VAPictureParameterBufferMPEG2 *pic_param;
1164 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1165 dri_bo *slice_data_bo;
1168 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1169 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1171 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1172 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1173 intel_batchbuffer_emit_mi_flush(batch);
1174 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1175 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1176 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1177 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1178 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1179 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1181 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1182 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1183 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1185 for (j = 0; j < decode_state->num_slice_params; j++) {
1186 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1187 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1188 slice_data_bo = decode_state->slice_datas[j]->bo;
1189 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191 if (j == decode_state->num_slice_params - 1)
1192 next_slice_group_param = NULL;
1194 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1196 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1197 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1199 if (i < decode_state->slice_params[j]->num_elements - 1)
1200 next_slice_param = slice_param + 1;
1202 next_slice_param = next_slice_group_param;
1204 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1209 intel_batchbuffer_end_atomic(batch);
1210 intel_batchbuffer_flush(batch);
1213 static const int va_to_gen7_vc1_pic_type[5] = {
1217 GEN7_VC1_BI_PICTURE,
1221 static const int va_to_gen7_vc1_mv[4] = {
1223 2, /* 1-MV half-pel */
1224 3, /* 1-MV half-pef bilinear */
1228 static const int b_picture_scale_factor[21] = {
1229 128, 85, 170, 64, 192,
1230 51, 102, 153, 204, 43,
1231 215, 37, 74, 111, 148,
1232 185, 222, 32, 96, 160,
1236 static const int va_to_gen7_vc1_condover[3] = {
1242 static const int va_to_gen7_vc1_profile[4] = {
1243 GEN7_VC1_SIMPLE_PROFILE,
1244 GEN7_VC1_MAIN_PROFILE,
1245 GEN7_VC1_RESERVED_PROFILE,
1246 GEN7_VC1_ADVANCED_PROFILE
1250 gen8_mfd_free_vc1_surface(void **data)
1252 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1254 if (!gen7_vc1_surface)
1257 dri_bo_unreference(gen7_vc1_surface->dmv);
1258 free(gen7_vc1_surface);
1263 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1264 VAPictureParameterBufferVC1 *pic_param,
1265 struct object_surface *obj_surface)
1267 struct i965_driver_data *i965 = i965_driver_data(ctx);
1268 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1269 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1270 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1272 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1274 if (!gen7_vc1_surface) {
1275 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1276 assert((obj_surface->size & 0x3f) == 0);
1277 obj_surface->private_data = gen7_vc1_surface;
1280 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1282 if (gen7_vc1_surface->dmv == NULL) {
1283 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1284 "direct mv w/r buffer",
1285 width_in_mbs * height_in_mbs * 64,
1291 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1292 struct decode_state *decode_state,
1293 struct gen7_mfd_context *gen7_mfd_context)
1295 VAPictureParameterBufferVC1 *pic_param;
1296 struct i965_driver_data *i965 = i965_driver_data(ctx);
1297 struct object_surface *obj_surface;
1302 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1303 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1304 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1305 picture_type = pic_param->picture_fields.bits.picture_type;
1307 intel_update_vc1_frame_store_index(ctx,
1310 gen7_mfd_context->reference_surface);
1312 /* Current decoded picture */
1313 obj_surface = decode_state->render_object;
1314 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1315 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1317 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1318 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1319 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1320 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1322 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1323 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1324 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1325 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1327 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1328 bo = dri_bo_alloc(i965->intel.bufmgr,
1333 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1334 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1336 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1337 bo = dri_bo_alloc(i965->intel.bufmgr,
1338 "deblocking filter row store",
1339 width_in_mbs * 7 * 64,
1342 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1343 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1345 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1346 bo = dri_bo_alloc(i965->intel.bufmgr,
1347 "bsd mpc row store",
1351 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1352 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1354 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1356 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1357 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1359 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1360 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1361 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1362 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1364 uint8_t *src = NULL, *dst = NULL;
1366 assert(decode_state->bit_plane->buffer);
1367 src = decode_state->bit_plane->buffer;
1369 bo = dri_bo_alloc(i965->intel.bufmgr,
1371 bitplane_width * height_in_mbs,
1374 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1376 dri_bo_map(bo, True);
1377 assert(bo->virtual);
1380 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1381 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1382 int src_index, dst_index;
1386 src_index = (src_h * width_in_mbs + src_w) / 2;
1387 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1388 src_value = ((src[src_index] >> src_shift) & 0xf);
1390 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1394 dst_index = src_w / 2;
1395 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1399 dst[src_w / 2] >>= 4;
1401 dst += bitplane_width;
1406 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1410 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1411 struct decode_state *decode_state,
1412 struct gen7_mfd_context *gen7_mfd_context)
1414 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1415 VAPictureParameterBufferVC1 *pic_param;
1416 struct object_surface *obj_surface;
1417 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1418 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1419 int unified_mv_mode;
1420 int ref_field_pic_polarity = 0;
1421 int scale_factor = 0;
1423 int dmv_surface_valid = 0;
1429 int interpolation_mode = 0;
1431 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1432 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1434 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1435 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1436 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1437 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1438 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1439 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1440 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1441 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1444 alt_pquant_config = 0;
1445 alt_pquant_edge_mask = 0;
1446 } else if (dquant == 2) {
1447 alt_pquant_config = 1;
1448 alt_pquant_edge_mask = 0xf;
1450 assert(dquant == 1);
1451 if (dquantfrm == 0) {
1452 alt_pquant_config = 0;
1453 alt_pquant_edge_mask = 0;
1456 assert(dquantfrm == 1);
1457 alt_pquant_config = 1;
1459 switch (dqprofile) {
1461 if (dqbilevel == 0) {
1462 alt_pquant_config = 2;
1463 alt_pquant_edge_mask = 0;
1465 assert(dqbilevel == 1);
1466 alt_pquant_config = 3;
1467 alt_pquant_edge_mask = 0;
1472 alt_pquant_edge_mask = 0xf;
1477 alt_pquant_edge_mask = 0x9;
1479 alt_pquant_edge_mask = (0x3 << dqdbedge);
1484 alt_pquant_edge_mask = (0x1 << dqsbedge);
1493 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1494 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1495 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1497 assert(pic_param->mv_fields.bits.mv_mode < 4);
1498 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1501 if (pic_param->sequence_fields.bits.interlace == 1 &&
1502 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1503 /* FIXME: calculate reference field picture polarity */
1505 ref_field_pic_polarity = 0;
1508 if (pic_param->b_picture_fraction < 21)
1509 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1511 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1513 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1514 picture_type == GEN7_VC1_I_PICTURE)
1515 picture_type = GEN7_VC1_BI_PICTURE;
1517 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1518 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1520 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1523 * 8.3.6.2.1 Transform Type Selection
1524 * If variable-sized transform coding is not enabled,
1525 * then the 8x8 transform shall be used for all blocks.
1526 * it is also MFX_VC1_PIC_STATE requirement.
1528 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1529 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1530 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1534 if (picture_type == GEN7_VC1_B_PICTURE) {
1535 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1537 obj_surface = decode_state->reference_objects[1];
1540 gen7_vc1_surface = obj_surface->private_data;
1542 if (!gen7_vc1_surface ||
1543 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1544 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1545 dmv_surface_valid = 0;
1547 dmv_surface_valid = 1;
1550 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1552 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1553 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1555 if (pic_param->picture_fields.bits.top_field_first)
1561 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1562 brfd = pic_param->reference_fields.bits.reference_distance;
1563 brfd = (scale_factor * brfd) >> 8;
1564 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1570 overlap = pic_param->sequence_fields.bits.overlap;
1574 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1575 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1576 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1580 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1581 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1584 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1585 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1586 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1588 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1589 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1596 assert(pic_param->conditional_overlap_flag < 3);
1597 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1599 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1600 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1601 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1602 interpolation_mode = 9; /* Half-pel bilinear */
1603 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1604 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1605 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1606 interpolation_mode = 1; /* Half-pel bicubic */
1608 interpolation_mode = 0; /* Quarter-pel bicubic */
1610 BEGIN_BCS_BATCH(batch, 6);
1611 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1612 OUT_BCS_BATCH(batch,
1613 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1614 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1615 OUT_BCS_BATCH(batch,
1616 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1617 dmv_surface_valid << 15 |
1618 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1619 pic_param->rounding_control << 13 |
1620 pic_param->sequence_fields.bits.syncmarker << 12 |
1621 interpolation_mode << 8 |
1622 0 << 7 | /* FIXME: scale up or down ??? */
1623 pic_param->range_reduction_frame << 6 |
1624 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1626 !pic_param->picture_fields.bits.is_first_field << 3 |
1627 (pic_param->sequence_fields.bits.profile == 3) << 0);
1628 OUT_BCS_BATCH(batch,
1629 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1630 picture_type << 26 |
1633 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1635 OUT_BCS_BATCH(batch,
1636 unified_mv_mode << 28 |
1637 pic_param->mv_fields.bits.four_mv_switch << 27 |
1638 pic_param->fast_uvmc_flag << 26 |
1639 ref_field_pic_polarity << 25 |
1640 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1641 pic_param->reference_fields.bits.reference_distance << 20 |
1642 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1643 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1644 pic_param->mv_fields.bits.extended_mv_range << 8 |
1645 alt_pquant_edge_mask << 4 |
1646 alt_pquant_config << 2 |
1647 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1648 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1649 OUT_BCS_BATCH(batch,
1650 !!pic_param->bitplane_present.value << 31 |
1651 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1652 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1653 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1654 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1655 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1656 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1657 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1658 pic_param->mv_fields.bits.mv_table << 20 |
1659 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1660 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1661 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1662 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1663 pic_param->mb_mode_table << 8 |
1665 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1666 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1667 pic_param->cbp_table << 0);
1668 ADVANCE_BCS_BATCH(batch);
1672 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1673 struct decode_state *decode_state,
1674 struct gen7_mfd_context *gen7_mfd_context)
1676 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1677 VAPictureParameterBufferVC1 *pic_param;
1678 int intensitycomp_single;
1680 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1681 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1682 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1684 BEGIN_BCS_BATCH(batch, 6);
1685 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1686 OUT_BCS_BATCH(batch,
1687 0 << 14 | /* FIXME: double ??? */
1689 intensitycomp_single << 10 |
1690 intensitycomp_single << 8 |
1691 0 << 4 | /* FIXME: interlace mode */
1693 OUT_BCS_BATCH(batch,
1694 pic_param->luma_shift << 16 |
1695 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1696 OUT_BCS_BATCH(batch, 0);
1697 OUT_BCS_BATCH(batch, 0);
1698 OUT_BCS_BATCH(batch, 0);
1699 ADVANCE_BCS_BATCH(batch);
1703 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1704 struct decode_state *decode_state,
1705 struct gen7_mfd_context *gen7_mfd_context)
1707 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1708 struct object_surface *obj_surface;
1709 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1711 obj_surface = decode_state->render_object;
1713 if (obj_surface && obj_surface->private_data) {
1714 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1717 obj_surface = decode_state->reference_objects[1];
1719 if (obj_surface && obj_surface->private_data) {
1720 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1723 BEGIN_BCS_BATCH(batch, 7);
1724 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1726 if (dmv_write_buffer)
1727 OUT_BCS_RELOC(batch, dmv_write_buffer,
1728 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1731 OUT_BCS_BATCH(batch, 0);
1733 OUT_BCS_BATCH(batch, 0);
1734 OUT_BCS_BATCH(batch, 0);
1736 if (dmv_read_buffer)
1737 OUT_BCS_RELOC(batch, dmv_read_buffer,
1738 I915_GEM_DOMAIN_INSTRUCTION, 0,
1741 OUT_BCS_BATCH(batch, 0);
1743 OUT_BCS_BATCH(batch, 0);
1744 OUT_BCS_BATCH(batch, 0);
1746 ADVANCE_BCS_BATCH(batch);
1750 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1752 int out_slice_data_bit_offset;
1753 int slice_header_size = in_slice_data_bit_offset / 8;
1757 out_slice_data_bit_offset = in_slice_data_bit_offset;
1759 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1760 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1765 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1768 return out_slice_data_bit_offset;
1772 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1773 VAPictureParameterBufferVC1 *pic_param,
1774 VASliceParameterBufferVC1 *slice_param,
1775 VASliceParameterBufferVC1 *next_slice_param,
1776 dri_bo *slice_data_bo,
1777 struct gen7_mfd_context *gen7_mfd_context)
1779 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1780 int next_slice_start_vert_pos;
1781 int macroblock_offset;
1782 uint8_t *slice_data = NULL;
1784 dri_bo_map(slice_data_bo, 0);
1785 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1786 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1787 slice_param->macroblock_offset,
1788 pic_param->sequence_fields.bits.profile);
1789 dri_bo_unmap(slice_data_bo);
1791 if (next_slice_param)
1792 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1794 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1796 BEGIN_BCS_BATCH(batch, 5);
1797 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1798 OUT_BCS_BATCH(batch,
1799 slice_param->slice_data_size - (macroblock_offset >> 3));
1800 OUT_BCS_BATCH(batch,
1801 slice_param->slice_data_offset + (macroblock_offset >> 3));
1802 OUT_BCS_BATCH(batch,
1803 slice_param->slice_vertical_position << 16 |
1804 next_slice_start_vert_pos << 0);
1805 OUT_BCS_BATCH(batch,
1806 (macroblock_offset & 0x7));
1807 ADVANCE_BCS_BATCH(batch);
1811 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1812 struct decode_state *decode_state,
1813 struct gen7_mfd_context *gen7_mfd_context)
1815 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1816 VAPictureParameterBufferVC1 *pic_param;
1817 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1818 dri_bo *slice_data_bo;
1821 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1822 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1824 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1825 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1826 intel_batchbuffer_emit_mi_flush(batch);
1827 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1828 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1829 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1830 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1831 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1832 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1833 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1835 for (j = 0; j < decode_state->num_slice_params; j++) {
1836 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1837 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1838 slice_data_bo = decode_state->slice_datas[j]->bo;
1839 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1841 if (j == decode_state->num_slice_params - 1)
1842 next_slice_group_param = NULL;
1844 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1846 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1847 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1849 if (i < decode_state->slice_params[j]->num_elements - 1)
1850 next_slice_param = slice_param + 1;
1852 next_slice_param = next_slice_group_param;
1854 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1859 intel_batchbuffer_end_atomic(batch);
1860 intel_batchbuffer_flush(batch);
1864 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1865 struct decode_state *decode_state,
1866 struct gen7_mfd_context *gen7_mfd_context)
1868 struct object_surface *obj_surface;
1869 VAPictureParameterBufferJPEGBaseline *pic_param;
1870 int subsampling = SUBSAMPLE_YUV420;
1871 int fourcc = VA_FOURCC_IMC3;
1873 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1875 if (pic_param->num_components == 1)
1876 subsampling = SUBSAMPLE_YUV400;
1877 else if (pic_param->num_components == 3) {
1878 int h1 = pic_param->components[0].h_sampling_factor;
1879 int h2 = pic_param->components[1].h_sampling_factor;
1880 int h3 = pic_param->components[2].h_sampling_factor;
1881 int v1 = pic_param->components[0].v_sampling_factor;
1882 int v2 = pic_param->components[1].v_sampling_factor;
1883 int v3 = pic_param->components[2].v_sampling_factor;
1885 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1886 v1 == 2 && v2 == 1 && v3 == 1) {
1887 subsampling = SUBSAMPLE_YUV420;
1888 fourcc = VA_FOURCC_IMC3;
1889 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1890 v1 == 1 && v2 == 1 && v3 == 1) {
1891 subsampling = SUBSAMPLE_YUV422H;
1892 fourcc = VA_FOURCC_422H;
1893 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1894 v1 == 1 && v2 == 1 && v3 == 1) {
1895 subsampling = SUBSAMPLE_YUV444;
1896 fourcc = VA_FOURCC_444P;
1897 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1898 v1 == 1 && v2 == 1 && v3 == 1) {
1899 subsampling = SUBSAMPLE_YUV411;
1900 fourcc = VA_FOURCC_411P;
1901 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1902 v1 == 2 && v2 == 1 && v3 == 1) {
1903 subsampling = SUBSAMPLE_YUV422V;
1904 fourcc = VA_FOURCC_422V;
1905 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1906 v1 == 2 && v2 == 2 && v3 == 2) {
1907 subsampling = SUBSAMPLE_YUV422H;
1908 fourcc = VA_FOURCC_422H;
1909 } else if (h1 == 2 && h2 == 2 && h3 == 2 &&
1910 v1 == 2 && v2 == 1 && v3 == 1) {
1911 subsampling = SUBSAMPLE_YUV422V;
1912 fourcc = VA_FOURCC_422V;
1920 /* Current decoded picture */
1921 obj_surface = decode_state->render_object;
1922 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1924 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1925 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1926 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1927 gen7_mfd_context->pre_deblocking_output.valid = 1;
1929 gen7_mfd_context->post_deblocking_output.bo = NULL;
1930 gen7_mfd_context->post_deblocking_output.valid = 0;
1932 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1933 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1935 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1936 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1938 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1939 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1941 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1942 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1944 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1945 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1948 static const int va_to_gen7_jpeg_rotation[4] = {
1949 GEN7_JPEG_ROTATION_0,
1950 GEN7_JPEG_ROTATION_90,
1951 GEN7_JPEG_ROTATION_180,
1952 GEN7_JPEG_ROTATION_270
1956 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1957 struct decode_state *decode_state,
1958 struct gen7_mfd_context *gen7_mfd_context)
1960 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1961 VAPictureParameterBufferJPEGBaseline *pic_param;
1962 int chroma_type = GEN7_YUV420;
1963 int frame_width_in_blks;
1964 int frame_height_in_blks;
1966 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1967 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1969 if (pic_param->num_components == 1)
1970 chroma_type = GEN7_YUV400;
1971 else if (pic_param->num_components == 3) {
1972 int h1 = pic_param->components[0].h_sampling_factor;
1973 int h2 = pic_param->components[1].h_sampling_factor;
1974 int h3 = pic_param->components[2].h_sampling_factor;
1975 int v1 = pic_param->components[0].v_sampling_factor;
1976 int v2 = pic_param->components[1].v_sampling_factor;
1977 int v3 = pic_param->components[2].v_sampling_factor;
1979 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1980 v1 == 2 && v2 == 1 && v3 == 1)
1981 chroma_type = GEN7_YUV420;
1982 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1983 v1 == 1 && v2 == 1 && v3 == 1)
1984 chroma_type = GEN7_YUV422H_2Y;
1985 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1986 v1 == 1 && v2 == 1 && v3 == 1)
1987 chroma_type = GEN7_YUV444;
1988 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1989 v1 == 1 && v2 == 1 && v3 == 1)
1990 chroma_type = GEN7_YUV411;
1991 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1992 v1 == 2 && v2 == 1 && v3 == 1)
1993 chroma_type = GEN7_YUV422V_2Y;
1994 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1995 v1 == 2 && v2 == 2 && v3 == 2)
1996 chroma_type = GEN7_YUV422H_4Y;
1997 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1998 v1 == 2 && v2 == 1 && v3 == 1)
1999 chroma_type = GEN7_YUV422V_4Y;
2004 if (chroma_type == GEN7_YUV400 ||
2005 chroma_type == GEN7_YUV444 ||
2006 chroma_type == GEN7_YUV422V_2Y) {
2007 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2008 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2009 } else if (chroma_type == GEN7_YUV411) {
2010 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2011 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2013 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2014 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2017 BEGIN_BCS_BATCH(batch, 3);
2018 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2019 OUT_BCS_BATCH(batch,
2020 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2021 (chroma_type << 0));
2022 OUT_BCS_BATCH(batch,
2023 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2024 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2025 ADVANCE_BCS_BATCH(batch);
2028 static const int va_to_gen7_jpeg_hufftable[2] = {
2034 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2035 struct decode_state *decode_state,
2036 struct gen7_mfd_context *gen7_mfd_context,
2039 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2040 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2043 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2046 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2048 for (index = 0; index < num_tables; index++) {
2049 int id = va_to_gen7_jpeg_hufftable[index];
2050 if (!huffman_table->load_huffman_table[index])
2052 BEGIN_BCS_BATCH(batch, 53);
2053 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2054 OUT_BCS_BATCH(batch, id);
2055 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2056 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2057 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2058 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2059 ADVANCE_BCS_BATCH(batch);
2063 static const int va_to_gen7_jpeg_qm[5] = {
2065 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2066 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2067 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2068 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2072 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2073 struct decode_state *decode_state,
2074 struct gen7_mfd_context *gen7_mfd_context)
2076 VAPictureParameterBufferJPEGBaseline *pic_param;
2077 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2080 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2083 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2084 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2086 assert(pic_param->num_components <= 3);
2088 for (index = 0; index < pic_param->num_components; index++) {
2089 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2091 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2092 unsigned char raster_qm[64];
2095 if (id > 4 || id < 1)
2098 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2101 qm_type = va_to_gen7_jpeg_qm[id];
2103 for (j = 0; j < 64; j++)
2104 raster_qm[zigzag_direct[j]] = qm[j];
2106 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2111 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2112 VAPictureParameterBufferJPEGBaseline *pic_param,
2113 VASliceParameterBufferJPEGBaseline *slice_param,
2114 VASliceParameterBufferJPEGBaseline *next_slice_param,
2115 dri_bo *slice_data_bo,
2116 struct gen7_mfd_context *gen7_mfd_context)
2118 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2119 int scan_component_mask = 0;
2122 assert(slice_param->num_components > 0);
2123 assert(slice_param->num_components < 4);
2124 assert(slice_param->num_components <= pic_param->num_components);
2126 for (i = 0; i < slice_param->num_components; i++) {
2127 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2129 scan_component_mask |= (1 << 0);
2132 scan_component_mask |= (1 << 1);
2135 scan_component_mask |= (1 << 2);
2143 BEGIN_BCS_BATCH(batch, 6);
2144 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2145 OUT_BCS_BATCH(batch,
2146 slice_param->slice_data_size);
2147 OUT_BCS_BATCH(batch,
2148 slice_param->slice_data_offset);
2149 OUT_BCS_BATCH(batch,
2150 slice_param->slice_horizontal_position << 16 |
2151 slice_param->slice_vertical_position << 0);
2152 OUT_BCS_BATCH(batch,
2153 ((slice_param->num_components != 1) << 30) | /* interleaved */
2154 (scan_component_mask << 27) | /* scan components */
2155 (0 << 26) | /* disable interrupt allowed */
2156 (slice_param->num_mcus << 0)); /* MCU count */
2157 OUT_BCS_BATCH(batch,
2158 (slice_param->restart_interval << 0)); /* RestartInterval */
2159 ADVANCE_BCS_BATCH(batch);
2162 /* Workaround for JPEG decoding on Ivybridge */
2168 unsigned char data[32];
2170 int data_bit_offset;
2172 } gen7_jpeg_wa_clip = {
2176 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2177 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2185 gen8_jpeg_wa_init(VADriverContextP ctx,
2186 struct gen7_mfd_context *gen7_mfd_context)
2188 struct i965_driver_data *i965 = i965_driver_data(ctx);
2190 struct object_surface *obj_surface;
2192 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2193 i965_DestroySurfaces(ctx,
2194 &gen7_mfd_context->jpeg_wa_surface_id,
2197 status = i965_CreateSurfaces(ctx,
2198 gen7_jpeg_wa_clip.width,
2199 gen7_jpeg_wa_clip.height,
2200 VA_RT_FORMAT_YUV420,
2202 &gen7_mfd_context->jpeg_wa_surface_id);
2203 assert(status == VA_STATUS_SUCCESS);
2205 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2206 assert(obj_surface);
2207 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2208 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2210 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2211 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2215 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2217 gen7_jpeg_wa_clip.data_size,
2218 gen7_jpeg_wa_clip.data);
2223 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2224 struct gen7_mfd_context *gen7_mfd_context)
2226 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2228 BEGIN_BCS_BATCH(batch, 5);
2229 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2230 OUT_BCS_BATCH(batch,
2231 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2232 (MFD_MODE_VLD << 15) | /* VLD mode */
2233 (0 << 10) | /* disable Stream-Out */
2234 (0 << 9) | /* Post Deblocking Output */
2235 (1 << 8) | /* Pre Deblocking Output */
2236 (0 << 5) | /* not in stitch mode */
2237 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2238 (MFX_FORMAT_AVC << 0));
2239 OUT_BCS_BATCH(batch,
2240 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2241 (0 << 3) | /* terminate if AVC mbdata error occurs */
2242 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2245 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2246 OUT_BCS_BATCH(batch, 0); /* reserved */
2247 ADVANCE_BCS_BATCH(batch);
2251 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2252 struct gen7_mfd_context *gen7_mfd_context)
2254 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2255 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2257 BEGIN_BCS_BATCH(batch, 6);
2258 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2259 OUT_BCS_BATCH(batch, 0);
2260 OUT_BCS_BATCH(batch,
2261 ((obj_surface->orig_width - 1) << 18) |
2262 ((obj_surface->orig_height - 1) << 4));
2263 OUT_BCS_BATCH(batch,
2264 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2265 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2266 (0 << 22) | /* surface object control state, ignored */
2267 ((obj_surface->width - 1) << 3) | /* pitch */
2268 (0 << 2) | /* must be 0 */
2269 (1 << 1) | /* must be tiled */
2270 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2271 OUT_BCS_BATCH(batch,
2272 (0 << 16) | /* X offset for U(Cb), must be 0 */
2273 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2274 OUT_BCS_BATCH(batch,
2275 (0 << 16) | /* X offset for V(Cr), must be 0 */
2276 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2277 ADVANCE_BCS_BATCH(batch);
2281 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2282 struct gen7_mfd_context *gen7_mfd_context)
2284 struct i965_driver_data *i965 = i965_driver_data(ctx);
2285 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2286 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2290 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2295 BEGIN_BCS_BATCH(batch, 61);
2296 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2297 OUT_BCS_RELOC(batch,
2299 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2301 OUT_BCS_BATCH(batch, 0);
2302 OUT_BCS_BATCH(batch, 0);
2305 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2306 OUT_BCS_BATCH(batch, 0);
2307 OUT_BCS_BATCH(batch, 0);
2309 /* uncompressed-video & stream out 7-12 */
2310 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2311 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2312 OUT_BCS_BATCH(batch, 0);
2313 OUT_BCS_BATCH(batch, 0);
2314 OUT_BCS_BATCH(batch, 0);
2315 OUT_BCS_BATCH(batch, 0);
2317 /* the DW 13-15 is for intra row store scratch */
2318 OUT_BCS_RELOC(batch,
2320 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2322 OUT_BCS_BATCH(batch, 0);
2323 OUT_BCS_BATCH(batch, 0);
2325 /* the DW 16-18 is for deblocking filter */
2326 OUT_BCS_BATCH(batch, 0);
2327 OUT_BCS_BATCH(batch, 0);
2328 OUT_BCS_BATCH(batch, 0);
2331 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2332 OUT_BCS_BATCH(batch, 0);
2333 OUT_BCS_BATCH(batch, 0);
2335 OUT_BCS_BATCH(batch, 0);
2337 /* the DW52-54 is for mb status address */
2338 OUT_BCS_BATCH(batch, 0);
2339 OUT_BCS_BATCH(batch, 0);
2340 OUT_BCS_BATCH(batch, 0);
2341 /* the DW56-60 is for ILDB & second ILDB address */
2342 OUT_BCS_BATCH(batch, 0);
2343 OUT_BCS_BATCH(batch, 0);
2344 OUT_BCS_BATCH(batch, 0);
2345 OUT_BCS_BATCH(batch, 0);
2346 OUT_BCS_BATCH(batch, 0);
2347 OUT_BCS_BATCH(batch, 0);
2349 ADVANCE_BCS_BATCH(batch);
2351 dri_bo_unreference(intra_bo);
2355 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2356 struct gen7_mfd_context *gen7_mfd_context)
2358 struct i965_driver_data *i965 = i965_driver_data(ctx);
2359 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2360 dri_bo *bsd_mpc_bo, *mpr_bo;
2362 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2363 "bsd mpc row store",
2364 11520, /* 1.5 * 120 * 64 */
2367 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2369 7680, /* 1. 0 * 120 * 64 */
2372 BEGIN_BCS_BATCH(batch, 10);
2373 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2375 OUT_BCS_RELOC(batch,
2377 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2380 OUT_BCS_BATCH(batch, 0);
2381 OUT_BCS_BATCH(batch, 0);
2383 OUT_BCS_RELOC(batch,
2385 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2387 OUT_BCS_BATCH(batch, 0);
2388 OUT_BCS_BATCH(batch, 0);
2390 OUT_BCS_BATCH(batch, 0);
2391 OUT_BCS_BATCH(batch, 0);
2392 OUT_BCS_BATCH(batch, 0);
2394 ADVANCE_BCS_BATCH(batch);
2396 dri_bo_unreference(bsd_mpc_bo);
2397 dri_bo_unreference(mpr_bo);
2401 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2402 struct gen7_mfd_context *gen7_mfd_context)
2408 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2409 struct gen7_mfd_context *gen7_mfd_context)
2411 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2413 int mbaff_frame_flag = 0;
2414 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2416 BEGIN_BCS_BATCH(batch, 16);
2417 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2418 OUT_BCS_BATCH(batch,
2419 width_in_mbs * height_in_mbs);
2420 OUT_BCS_BATCH(batch,
2421 ((height_in_mbs - 1) << 16) |
2422 ((width_in_mbs - 1) << 0));
2423 OUT_BCS_BATCH(batch,
2428 (0 << 12) | /* differ from GEN6 */
2431 OUT_BCS_BATCH(batch,
2432 (1 << 10) | /* 4:2:0 */
2433 (1 << 7) | /* CABAC */
2439 (mbaff_frame_flag << 1) |
2441 OUT_BCS_BATCH(batch, 0);
2442 OUT_BCS_BATCH(batch, 0);
2443 OUT_BCS_BATCH(batch, 0);
2444 OUT_BCS_BATCH(batch, 0);
2445 OUT_BCS_BATCH(batch, 0);
2446 OUT_BCS_BATCH(batch, 0);
2447 OUT_BCS_BATCH(batch, 0);
2448 OUT_BCS_BATCH(batch, 0);
2449 OUT_BCS_BATCH(batch, 0);
2450 OUT_BCS_BATCH(batch, 0);
2451 OUT_BCS_BATCH(batch, 0);
2452 ADVANCE_BCS_BATCH(batch);
2456 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2457 struct gen7_mfd_context *gen7_mfd_context)
2459 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2462 BEGIN_BCS_BATCH(batch, 71);
2463 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2465 /* reference surfaces 0..15 */
2466 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2467 OUT_BCS_BATCH(batch, 0); /* top */
2468 OUT_BCS_BATCH(batch, 0); /* bottom */
2471 OUT_BCS_BATCH(batch, 0);
2473 /* the current decoding frame/field */
2474 OUT_BCS_BATCH(batch, 0); /* top */
2475 OUT_BCS_BATCH(batch, 0);
2476 OUT_BCS_BATCH(batch, 0);
2479 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2480 OUT_BCS_BATCH(batch, 0);
2481 OUT_BCS_BATCH(batch, 0);
2484 OUT_BCS_BATCH(batch, 0);
2485 OUT_BCS_BATCH(batch, 0);
2487 ADVANCE_BCS_BATCH(batch);
2491 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2492 struct gen7_mfd_context *gen7_mfd_context)
2494 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2496 BEGIN_BCS_BATCH(batch, 11);
2497 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2498 OUT_BCS_RELOC(batch,
2499 gen7_mfd_context->jpeg_wa_slice_data_bo,
2500 I915_GEM_DOMAIN_INSTRUCTION, 0,
2502 OUT_BCS_BATCH(batch, 0);
2503 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2504 OUT_BCS_BATCH(batch, 0);
2505 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2506 OUT_BCS_BATCH(batch, 0);
2507 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2508 OUT_BCS_BATCH(batch, 0);
2509 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2510 OUT_BCS_BATCH(batch, 0);
2511 ADVANCE_BCS_BATCH(batch);
2515 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2516 struct gen7_mfd_context *gen7_mfd_context)
2518 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2520 /* the input bitsteam format on GEN7 differs from GEN6 */
2521 BEGIN_BCS_BATCH(batch, 6);
2522 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2523 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2524 OUT_BCS_BATCH(batch, 0);
2525 OUT_BCS_BATCH(batch,
2531 OUT_BCS_BATCH(batch,
2532 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2535 (1 << 3) | /* LastSlice Flag */
2536 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2537 OUT_BCS_BATCH(batch, 0);
2538 ADVANCE_BCS_BATCH(batch);
2542 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2543 struct gen7_mfd_context *gen7_mfd_context)
2545 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2546 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2547 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2548 int first_mb_in_slice = 0;
2549 int slice_type = SLICE_TYPE_I;
2551 BEGIN_BCS_BATCH(batch, 11);
2552 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2553 OUT_BCS_BATCH(batch, slice_type);
2554 OUT_BCS_BATCH(batch,
2555 (num_ref_idx_l1 << 24) |
2556 (num_ref_idx_l0 << 16) |
2559 OUT_BCS_BATCH(batch,
2561 (1 << 27) | /* disable Deblocking */
2563 (gen7_jpeg_wa_clip.qp << 16) |
2566 OUT_BCS_BATCH(batch,
2567 (slice_ver_pos << 24) |
2568 (slice_hor_pos << 16) |
2569 (first_mb_in_slice << 0));
2570 OUT_BCS_BATCH(batch,
2571 (next_slice_ver_pos << 16) |
2572 (next_slice_hor_pos << 0));
2573 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2574 OUT_BCS_BATCH(batch, 0);
2575 OUT_BCS_BATCH(batch, 0);
2576 OUT_BCS_BATCH(batch, 0);
2577 OUT_BCS_BATCH(batch, 0);
2578 ADVANCE_BCS_BATCH(batch);
2582 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2583 struct gen7_mfd_context *gen7_mfd_context)
2585 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2586 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2587 intel_batchbuffer_emit_mi_flush(batch);
2588 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2589 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2590 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2591 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2592 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2593 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2594 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2596 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2597 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2598 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2604 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2605 struct decode_state *decode_state,
2606 struct gen7_mfd_context *gen7_mfd_context)
2608 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2609 VAPictureParameterBufferJPEGBaseline *pic_param;
2610 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2611 dri_bo *slice_data_bo;
2612 int i, j, max_selector = 0;
2614 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2615 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2617 /* Currently only support Baseline DCT */
2618 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2619 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2621 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2623 intel_batchbuffer_emit_mi_flush(batch);
2624 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2625 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2626 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2627 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2628 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2630 for (j = 0; j < decode_state->num_slice_params; j++) {
2631 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2632 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2633 slice_data_bo = decode_state->slice_datas[j]->bo;
2634 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2636 if (j == decode_state->num_slice_params - 1)
2637 next_slice_group_param = NULL;
2639 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2641 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2644 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2646 if (i < decode_state->slice_params[j]->num_elements - 1)
2647 next_slice_param = slice_param + 1;
2649 next_slice_param = next_slice_group_param;
2651 for (component = 0; component < slice_param->num_components; component++) {
2652 if (max_selector < slice_param->components[component].dc_table_selector)
2653 max_selector = slice_param->components[component].dc_table_selector;
2655 if (max_selector < slice_param->components[component].ac_table_selector)
2656 max_selector = slice_param->components[component].ac_table_selector;
2663 assert(max_selector < 2);
2664 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2666 for (j = 0; j < decode_state->num_slice_params; j++) {
2667 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2668 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2669 slice_data_bo = decode_state->slice_datas[j]->bo;
2670 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2672 if (j == decode_state->num_slice_params - 1)
2673 next_slice_group_param = NULL;
2675 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2677 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2678 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2680 if (i < decode_state->slice_params[j]->num_elements - 1)
2681 next_slice_param = slice_param + 1;
2683 next_slice_param = next_slice_group_param;
2685 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2690 intel_batchbuffer_end_atomic(batch);
2691 intel_batchbuffer_flush(batch);
2694 static const int vp8_dc_qlookup[128] =
2696 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2697 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2698 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2699 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2700 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2701 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2702 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2703 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2706 static const int vp8_ac_qlookup[128] =
2708 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2709 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2710 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2711 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2712 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2713 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2714 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2715 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2718 static inline unsigned int vp8_clip_quantization_index(int index)
2729 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2730 struct decode_state *decode_state,
2731 struct gen7_mfd_context *gen7_mfd_context)
2733 struct object_surface *obj_surface;
2734 struct i965_driver_data *i965 = i965_driver_data(ctx);
2736 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2737 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2738 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2740 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2741 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2743 intel_update_vp8_frame_store_index(ctx,
2746 gen7_mfd_context->reference_surface);
2748 /* Current decoded picture */
2749 obj_surface = decode_state->render_object;
2750 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2752 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2753 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2754 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2755 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2757 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2758 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2759 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2760 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2762 intel_ensure_vp8_segmentation_buffer(ctx,
2763 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2765 /* The same as AVC */
2766 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2767 bo = dri_bo_alloc(i965->intel.bufmgr,
2772 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2773 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2775 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2776 bo = dri_bo_alloc(i965->intel.bufmgr,
2777 "deblocking filter row store",
2778 width_in_mbs * 64 * 4,
2781 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2782 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2784 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2785 bo = dri_bo_alloc(i965->intel.bufmgr,
2786 "bsd mpc row store",
2787 width_in_mbs * 64 * 2,
2790 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2791 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2793 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2794 bo = dri_bo_alloc(i965->intel.bufmgr,
2796 width_in_mbs * 64 * 2,
2799 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2800 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2802 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2806 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2807 struct decode_state *decode_state,
2808 struct gen7_mfd_context *gen7_mfd_context)
2810 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2811 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2812 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2813 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2814 dri_bo *probs_bo = decode_state->probability_data->bo;
2816 unsigned int quantization_value[4][6];
2818 /* There is no safe way to error out if the segmentation buffer
2819 could not be allocated. So, instead of aborting, simply decode
2820 something even if the result may look totally inacurate */
2821 const unsigned int enable_segmentation =
2822 pic_param->pic_fields.bits.segmentation_enabled &&
2823 gen7_mfd_context->segmentation_buffer.valid;
2825 log2num = (int)log2(slice_param->num_of_partitions - 1);
2827 BEGIN_BCS_BATCH(batch, 38);
2828 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2829 OUT_BCS_BATCH(batch,
2830 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2831 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2832 OUT_BCS_BATCH(batch,
2834 pic_param->pic_fields.bits.sharpness_level << 16 |
2835 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2836 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2837 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2838 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2839 (enable_segmentation &&
2840 pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2841 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2842 (enable_segmentation &&
2843 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2844 (enable_segmentation &&
2845 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2846 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2847 pic_param->pic_fields.bits.filter_type << 4 |
2848 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2849 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2851 OUT_BCS_BATCH(batch,
2852 pic_param->loop_filter_level[3] << 24 |
2853 pic_param->loop_filter_level[2] << 16 |
2854 pic_param->loop_filter_level[1] << 8 |
2855 pic_param->loop_filter_level[0] << 0);
2857 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2858 for (i = 0; i < 4; i++) {
2859 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2860 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2861 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2862 /* 101581>>16 is equivalent to 155/100 */
2863 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2864 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2865 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2867 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2868 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2870 OUT_BCS_BATCH(batch,
2871 quantization_value[i][0] << 16 | /* Y1AC */
2872 quantization_value[i][1] << 0); /* Y1DC */
2873 OUT_BCS_BATCH(batch,
2874 quantization_value[i][5] << 16 | /* UVAC */
2875 quantization_value[i][4] << 0); /* UVDC */
2876 OUT_BCS_BATCH(batch,
2877 quantization_value[i][3] << 16 | /* Y2AC */
2878 quantization_value[i][2] << 0); /* Y2DC */
2881 /* CoeffProbability table for non-key frame, DW16-DW18 */
2883 OUT_BCS_RELOC(batch, probs_bo,
2884 0, I915_GEM_DOMAIN_INSTRUCTION,
2886 OUT_BCS_BATCH(batch, 0);
2887 OUT_BCS_BATCH(batch, 0);
2889 OUT_BCS_BATCH(batch, 0);
2890 OUT_BCS_BATCH(batch, 0);
2891 OUT_BCS_BATCH(batch, 0);
2894 OUT_BCS_BATCH(batch,
2895 pic_param->mb_segment_tree_probs[2] << 16 |
2896 pic_param->mb_segment_tree_probs[1] << 8 |
2897 pic_param->mb_segment_tree_probs[0] << 0);
2899 OUT_BCS_BATCH(batch,
2900 pic_param->prob_skip_false << 24 |
2901 pic_param->prob_intra << 16 |
2902 pic_param->prob_last << 8 |
2903 pic_param->prob_gf << 0);
2905 OUT_BCS_BATCH(batch,
2906 pic_param->y_mode_probs[3] << 24 |
2907 pic_param->y_mode_probs[2] << 16 |
2908 pic_param->y_mode_probs[1] << 8 |
2909 pic_param->y_mode_probs[0] << 0);
2911 OUT_BCS_BATCH(batch,
2912 pic_param->uv_mode_probs[2] << 16 |
2913 pic_param->uv_mode_probs[1] << 8 |
2914 pic_param->uv_mode_probs[0] << 0);
2916 /* MV update value, DW23-DW32 */
2917 for (i = 0; i < 2; i++) {
2918 for (j = 0; j < 20; j += 4) {
2919 OUT_BCS_BATCH(batch,
2920 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2921 pic_param->mv_probs[i][j + 2] << 16 |
2922 pic_param->mv_probs[i][j + 1] << 8 |
2923 pic_param->mv_probs[i][j + 0] << 0);
2927 OUT_BCS_BATCH(batch,
2928 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2929 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2930 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2931 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2933 OUT_BCS_BATCH(batch,
2934 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2935 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2936 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2937 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2939 /* segmentation id stream base address, DW35-DW37 */
2940 if (enable_segmentation) {
2941 OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2942 0, I915_GEM_DOMAIN_INSTRUCTION,
2944 OUT_BCS_BATCH(batch, 0);
2945 OUT_BCS_BATCH(batch, 0);
2948 OUT_BCS_BATCH(batch, 0);
2949 OUT_BCS_BATCH(batch, 0);
2950 OUT_BCS_BATCH(batch, 0);
2952 ADVANCE_BCS_BATCH(batch);
2956 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2957 VAPictureParameterBufferVP8 *pic_param,
2958 VASliceParameterBufferVP8 *slice_param,
2959 dri_bo *slice_data_bo,
2960 struct gen7_mfd_context *gen7_mfd_context)
2962 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2964 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2965 unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2966 unsigned int partition_size_0 = slice_param->partition_size[0];
2968 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2969 if (used_bits == 8) {
2972 partition_size_0 -= 1;
2975 assert(slice_param->num_of_partitions >= 2);
2976 assert(slice_param->num_of_partitions <= 9);
2978 log2num = (int)log2(slice_param->num_of_partitions - 1);
2980 BEGIN_BCS_BATCH(batch, 22);
2981 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2982 OUT_BCS_BATCH(batch,
2983 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2984 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2986 (slice_param->macroblock_offset & 0x7));
2987 OUT_BCS_BATCH(batch,
2988 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2991 OUT_BCS_BATCH(batch, partition_size_0 + 1);
2992 OUT_BCS_BATCH(batch, offset);
2993 //partion sizes in bytes are present after the above first partition when there are more than one token partition
2994 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2995 for (i = 1; i < 9; i++) {
2996 if (i < slice_param->num_of_partitions) {
2997 OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
2998 OUT_BCS_BATCH(batch, offset);
3000 OUT_BCS_BATCH(batch, 0);
3001 OUT_BCS_BATCH(batch, 0);
3004 offset += slice_param->partition_size[i];
3007 OUT_BCS_BATCH(batch, 0); /* concealment method */
3009 ADVANCE_BCS_BATCH(batch);
3013 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3014 struct decode_state *decode_state,
3015 struct gen7_mfd_context *gen7_mfd_context)
3017 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3018 VAPictureParameterBufferVP8 *pic_param;
3019 VASliceParameterBufferVP8 *slice_param;
3020 dri_bo *slice_data_bo;
3022 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3023 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3025 /* one slice per frame */
3026 if (decode_state->num_slice_params != 1 ||
3027 (!decode_state->slice_params ||
3028 !decode_state->slice_params[0] ||
3029 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3030 (!decode_state->slice_datas ||
3031 !decode_state->slice_datas[0] ||
3032 !decode_state->slice_datas[0]->bo) ||
3033 !decode_state->probability_data) {
3034 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3039 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3040 slice_data_bo = decode_state->slice_datas[0]->bo;
3042 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3043 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3044 intel_batchbuffer_emit_mi_flush(batch);
3045 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3046 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3047 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3048 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3049 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3050 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3051 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3052 intel_batchbuffer_end_atomic(batch);
3053 intel_batchbuffer_flush(batch);
3057 gen8_mfd_decode_picture(VADriverContextP ctx,
3059 union codec_state *codec_state,
3060 struct hw_context *hw_context)
3063 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3064 struct decode_state *decode_state = &codec_state->decode;
3067 assert(gen7_mfd_context);
3069 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3071 if (vaStatus != VA_STATUS_SUCCESS)
3074 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3077 case VAProfileMPEG2Simple:
3078 case VAProfileMPEG2Main:
3079 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3082 case VAProfileH264ConstrainedBaseline:
3083 case VAProfileH264Main:
3084 case VAProfileH264High:
3085 case VAProfileH264StereoHigh:
3086 case VAProfileH264MultiviewHigh:
3087 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3090 case VAProfileVC1Simple:
3091 case VAProfileVC1Main:
3092 case VAProfileVC1Advanced:
3093 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3096 case VAProfileJPEGBaseline:
3097 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3100 case VAProfileVP8Version0_3:
3101 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3109 vaStatus = VA_STATUS_SUCCESS;
3116 gen8_mfd_context_destroy(void *hw_context)
3118 VADriverContextP ctx;
3119 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3121 ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3123 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3124 gen7_mfd_context->post_deblocking_output.bo = NULL;
3126 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3127 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3129 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3130 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3132 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3133 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3135 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3136 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3138 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3139 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3141 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3142 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3144 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3145 gen7_mfd_context->segmentation_buffer.bo = NULL;
3147 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3149 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3150 i965_DestroySurfaces(ctx,
3151 &gen7_mfd_context->jpeg_wa_surface_id,
3153 gen7_mfd_context->jpeg_wa_surface_object = NULL;
3156 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3157 free(gen7_mfd_context);
3160 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3161 struct gen7_mfd_context *gen7_mfd_context)
3163 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3164 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3165 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3166 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3170 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3172 struct intel_driver_data *intel = intel_driver_data(ctx);
3173 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3176 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3177 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3178 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3180 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3181 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3182 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3185 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3186 gen7_mfd_context->segmentation_buffer.valid = 0;
3188 switch (obj_config->profile) {
3189 case VAProfileMPEG2Simple:
3190 case VAProfileMPEG2Main:
3191 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3194 case VAProfileH264ConstrainedBaseline:
3195 case VAProfileH264Main:
3196 case VAProfileH264High:
3197 case VAProfileH264StereoHigh:
3198 case VAProfileH264MultiviewHigh:
3199 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3205 gen7_mfd_context->driver_context = ctx;
3206 return (struct hw_context *)gen7_mfd_context;