2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 gen7_avc_surface->base.frame_store_id = -1;
78 assert((obj_surface->size & 0x3f) == 0);
79 obj_surface->private_data = gen7_avc_surface;
82 /* DMV buffers now relate to the whole frame, irrespective of
84 if (gen7_avc_surface->dmv_top == NULL) {
85 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86 "direct mv w/r buffer",
87 width_in_mbs * height_in_mbs * 128,
89 assert(gen7_avc_surface->dmv_top);
94 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
95 struct decode_state *decode_state,
97 struct gen7_mfd_context *gen7_mfd_context)
99 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
101 assert(standard_select == MFX_FORMAT_MPEG2 ||
102 standard_select == MFX_FORMAT_AVC ||
103 standard_select == MFX_FORMAT_VC1 ||
104 standard_select == MFX_FORMAT_JPEG ||
105 standard_select == MFX_FORMAT_VP8);
107 BEGIN_BCS_BATCH(batch, 5);
108 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
110 (MFX_LONG_MODE << 17) | /* Currently only support long format */
111 (MFD_MODE_VLD << 15) | /* VLD mode */
112 (0 << 10) | /* disable Stream-Out */
113 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
114 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
115 (0 << 5) | /* not in stitch mode */
116 (MFX_CODEC_DECODE << 4) | /* decoding mode */
117 (standard_select << 0));
119 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
120 (0 << 3) | /* terminate if AVC mbdata error occurs */
121 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
124 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
125 OUT_BCS_BATCH(batch, 0); /* reserved */
126 ADVANCE_BCS_BATCH(batch);
130 gen8_mfd_surface_state(VADriverContextP ctx,
131 struct decode_state *decode_state,
133 struct gen7_mfd_context *gen7_mfd_context)
135 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
136 struct object_surface *obj_surface = decode_state->render_object;
137 unsigned int y_cb_offset;
138 unsigned int y_cr_offset;
139 unsigned int surface_format;
143 y_cb_offset = obj_surface->y_cb_offset;
144 y_cr_offset = obj_surface->y_cr_offset;
146 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
147 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
149 BEGIN_BCS_BATCH(batch, 6);
150 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
151 OUT_BCS_BATCH(batch, 0);
153 ((obj_surface->orig_height - 1) << 18) |
154 ((obj_surface->orig_width - 1) << 4));
156 (surface_format << 28) | /* 420 planar YUV surface */
157 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
158 (0 << 22) | /* surface object control state, ignored */
159 ((obj_surface->width - 1) << 3) | /* pitch */
160 (0 << 2) | /* must be 0 */
161 (1 << 1) | /* must be tiled */
162 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
164 (0 << 16) | /* X offset for U(Cb), must be 0 */
165 (y_cb_offset << 0)); /* Y offset for U(Cb) */
167 (0 << 16) | /* X offset for V(Cr), must be 0 */
168 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
169 ADVANCE_BCS_BATCH(batch);
173 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
174 struct decode_state *decode_state,
176 struct gen7_mfd_context *gen7_mfd_context)
178 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
181 BEGIN_BCS_BATCH(batch, 61);
182 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
183 /* Pre-deblock 1-3 */
184 if (gen7_mfd_context->pre_deblocking_output.valid)
185 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
186 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
189 OUT_BCS_BATCH(batch, 0);
191 OUT_BCS_BATCH(batch, 0);
192 OUT_BCS_BATCH(batch, 0);
193 /* Post-debloing 4-6 */
194 if (gen7_mfd_context->post_deblocking_output.valid)
195 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
196 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
199 OUT_BCS_BATCH(batch, 0);
201 OUT_BCS_BATCH(batch, 0);
202 OUT_BCS_BATCH(batch, 0);
204 /* uncompressed-video & stream out 7-12 */
205 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
207 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
209 OUT_BCS_BATCH(batch, 0);
210 OUT_BCS_BATCH(batch, 0);
212 /* intra row-store scratch 13-15 */
213 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
214 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
215 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
218 OUT_BCS_BATCH(batch, 0);
220 OUT_BCS_BATCH(batch, 0);
221 OUT_BCS_BATCH(batch, 0);
222 /* deblocking-filter-row-store 16-18 */
223 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
224 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
225 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
228 OUT_BCS_BATCH(batch, 0);
229 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, 0);
233 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
234 struct object_surface *obj_surface;
236 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
237 gen7_mfd_context->reference_surface[i].obj_surface &&
238 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
239 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
241 OUT_BCS_RELOC(batch, obj_surface->bo,
242 I915_GEM_DOMAIN_INSTRUCTION, 0,
245 OUT_BCS_BATCH(batch, 0);
248 OUT_BCS_BATCH(batch, 0);
251 /* reference property 51 */
252 OUT_BCS_BATCH(batch, 0);
254 /* Macroblock status & ILDB 52-57 */
255 OUT_BCS_BATCH(batch, 0);
256 OUT_BCS_BATCH(batch, 0);
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
259 OUT_BCS_BATCH(batch, 0);
260 OUT_BCS_BATCH(batch, 0);
262 /* the second Macroblock status 58-60 */
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
265 OUT_BCS_BATCH(batch, 0);
267 ADVANCE_BCS_BATCH(batch);
271 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
272 dri_bo *slice_data_bo,
274 struct gen7_mfd_context *gen7_mfd_context)
276 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
278 BEGIN_BCS_BATCH(batch, 26);
279 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
281 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
282 OUT_BCS_BATCH(batch, 0);
283 OUT_BCS_BATCH(batch, 0);
284 /* Upper bound 4-5 */
285 OUT_BCS_BATCH(batch, 0);
286 OUT_BCS_BATCH(batch, 0);
288 /* MFX indirect MV 6-10 */
289 OUT_BCS_BATCH(batch, 0);
290 OUT_BCS_BATCH(batch, 0);
291 OUT_BCS_BATCH(batch, 0);
292 OUT_BCS_BATCH(batch, 0);
293 OUT_BCS_BATCH(batch, 0);
295 /* MFX IT_COFF 11-15 */
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
299 OUT_BCS_BATCH(batch, 0);
300 OUT_BCS_BATCH(batch, 0);
302 /* MFX IT_DBLK 16-20 */
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
306 OUT_BCS_BATCH(batch, 0);
307 OUT_BCS_BATCH(batch, 0);
309 /* MFX PAK_BSE object for encoder 21-25 */
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
313 OUT_BCS_BATCH(batch, 0);
314 OUT_BCS_BATCH(batch, 0);
316 ADVANCE_BCS_BATCH(batch);
320 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
321 struct decode_state *decode_state,
323 struct gen7_mfd_context *gen7_mfd_context)
325 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
327 BEGIN_BCS_BATCH(batch, 10);
328 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
330 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
331 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
332 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
335 OUT_BCS_BATCH(batch, 0);
337 OUT_BCS_BATCH(batch, 0);
338 OUT_BCS_BATCH(batch, 0);
339 /* MPR Row Store Scratch buffer 4-6 */
340 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
341 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
342 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
345 OUT_BCS_BATCH(batch, 0);
347 OUT_BCS_BATCH(batch, 0);
348 OUT_BCS_BATCH(batch, 0);
351 if (gen7_mfd_context->bitplane_read_buffer.valid)
352 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
353 I915_GEM_DOMAIN_INSTRUCTION, 0,
356 OUT_BCS_BATCH(batch, 0);
357 OUT_BCS_BATCH(batch, 0);
358 OUT_BCS_BATCH(batch, 0);
359 ADVANCE_BCS_BATCH(batch);
363 gen8_mfd_qm_state(VADriverContextP ctx,
367 struct gen7_mfd_context *gen7_mfd_context)
369 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
370 unsigned int qm_buffer[16];
372 assert(qm_length <= 16 * 4);
373 memcpy(qm_buffer, qm, qm_length);
375 BEGIN_BCS_BATCH(batch, 18);
376 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
377 OUT_BCS_BATCH(batch, qm_type << 0);
378 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
379 ADVANCE_BCS_BATCH(batch);
383 gen8_mfd_avc_img_state(VADriverContextP ctx,
384 struct decode_state *decode_state,
385 struct gen7_mfd_context *gen7_mfd_context)
387 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
389 int mbaff_frame_flag;
390 unsigned int width_in_mbs, height_in_mbs;
391 VAPictureParameterBufferH264 *pic_param;
393 assert(decode_state->pic_param && decode_state->pic_param->buffer);
394 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
395 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
397 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
399 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
404 if ((img_struct & 0x1) == 0x1) {
405 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
407 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
410 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
411 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
412 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
414 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
417 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
418 !pic_param->pic_fields.bits.field_pic_flag);
420 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
421 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
423 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
424 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
425 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
426 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
428 BEGIN_BCS_BATCH(batch, 17);
429 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
431 (width_in_mbs * height_in_mbs - 1));
433 ((height_in_mbs - 1) << 16) |
434 ((width_in_mbs - 1) << 0));
436 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
437 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
438 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
439 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
440 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
441 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
444 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
445 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
446 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
447 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
448 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
449 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
450 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
451 (mbaff_frame_flag << 1) |
452 (pic_param->pic_fields.bits.field_pic_flag << 0));
453 OUT_BCS_BATCH(batch, 0);
454 OUT_BCS_BATCH(batch, 0);
455 OUT_BCS_BATCH(batch, 0);
456 OUT_BCS_BATCH(batch, 0);
457 OUT_BCS_BATCH(batch, 0);
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 OUT_BCS_BATCH(batch, 0);
465 ADVANCE_BCS_BATCH(batch);
469 gen8_mfd_avc_qm_state(VADriverContextP ctx,
470 struct decode_state *decode_state,
471 struct gen7_mfd_context *gen7_mfd_context)
473 VAIQMatrixBufferH264 *iq_matrix;
474 VAPictureParameterBufferH264 *pic_param;
476 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
477 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
479 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
481 assert(decode_state->pic_param && decode_state->pic_param->buffer);
482 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
484 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
485 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
487 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
488 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
489 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
494 gen8_mfd_avc_picid_state(VADriverContextP ctx,
495 struct decode_state *decode_state,
496 struct gen7_mfd_context *gen7_mfd_context)
498 gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
499 gen7_mfd_context->reference_surface);
503 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
504 struct decode_state *decode_state,
505 VAPictureParameterBufferH264 *pic_param,
506 VASliceParameterBufferH264 *slice_param,
507 struct gen7_mfd_context *gen7_mfd_context)
509 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
510 struct object_surface *obj_surface;
511 GenAvcSurface *gen7_avc_surface;
512 VAPictureH264 *va_pic;
515 BEGIN_BCS_BATCH(batch, 71);
516 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
518 /* reference surfaces 0..15 */
519 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
520 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
521 gen7_mfd_context->reference_surface[i].obj_surface &&
522 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
524 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
525 gen7_avc_surface = obj_surface->private_data;
527 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
528 I915_GEM_DOMAIN_INSTRUCTION, 0,
530 OUT_BCS_BATCH(batch, 0);
532 OUT_BCS_BATCH(batch, 0);
533 OUT_BCS_BATCH(batch, 0);
537 OUT_BCS_BATCH(batch, 0);
539 /* the current decoding frame/field */
540 va_pic = &pic_param->CurrPic;
541 obj_surface = decode_state->render_object;
542 assert(obj_surface->bo && obj_surface->private_data);
543 gen7_avc_surface = obj_surface->private_data;
545 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
546 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
549 OUT_BCS_BATCH(batch, 0);
550 OUT_BCS_BATCH(batch, 0);
553 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
554 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
557 const VAPictureH264 * const va_pic = avc_find_picture(
558 obj_surface->base.id, pic_param->ReferenceFrames,
559 ARRAY_ELEMS(pic_param->ReferenceFrames));
561 assert(va_pic != NULL);
562 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
563 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
565 OUT_BCS_BATCH(batch, 0);
566 OUT_BCS_BATCH(batch, 0);
570 va_pic = &pic_param->CurrPic;
571 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
572 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
574 ADVANCE_BCS_BATCH(batch);
578 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
579 VAPictureParameterBufferH264 *pic_param,
580 VASliceParameterBufferH264 *next_slice_param,
581 struct gen7_mfd_context *gen7_mfd_context)
583 gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
587 gen8_mfd_avc_slice_state(VADriverContextP ctx,
588 VAPictureParameterBufferH264 *pic_param,
589 VASliceParameterBufferH264 *slice_param,
590 VASliceParameterBufferH264 *next_slice_param,
591 struct gen7_mfd_context *gen7_mfd_context)
593 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
594 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
595 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
596 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
597 int num_ref_idx_l0, num_ref_idx_l1;
598 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
599 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
600 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
603 if (slice_param->slice_type == SLICE_TYPE_I ||
604 slice_param->slice_type == SLICE_TYPE_SI) {
605 slice_type = SLICE_TYPE_I;
606 } else if (slice_param->slice_type == SLICE_TYPE_P ||
607 slice_param->slice_type == SLICE_TYPE_SP) {
608 slice_type = SLICE_TYPE_P;
610 assert(slice_param->slice_type == SLICE_TYPE_B);
611 slice_type = SLICE_TYPE_B;
614 if (slice_type == SLICE_TYPE_I) {
615 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
616 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
619 } else if (slice_type == SLICE_TYPE_P) {
620 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
621 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
624 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
625 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
628 first_mb_in_slice = slice_param->first_mb_in_slice;
629 slice_hor_pos = first_mb_in_slice % width_in_mbs;
630 slice_ver_pos = first_mb_in_slice / width_in_mbs;
633 slice_ver_pos = slice_ver_pos << 1;
634 if (next_slice_param) {
635 first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
636 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
637 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
640 next_slice_ver_pos = next_slice_ver_pos << 1;
642 next_slice_hor_pos = 0;
643 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
646 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
647 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
648 OUT_BCS_BATCH(batch, slice_type);
650 (num_ref_idx_l1 << 24) |
651 (num_ref_idx_l0 << 16) |
652 (slice_param->chroma_log2_weight_denom << 8) |
653 (slice_param->luma_log2_weight_denom << 0));
655 (slice_param->direct_spatial_mv_pred_flag << 29) |
656 (slice_param->disable_deblocking_filter_idc << 27) |
657 (slice_param->cabac_init_idc << 24) |
658 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
659 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
660 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
662 (slice_ver_pos << 24) |
663 (slice_hor_pos << 16) |
664 (first_mb_in_slice << 0));
666 (next_slice_ver_pos << 16) |
667 (next_slice_hor_pos << 0));
669 (next_slice_param == NULL) << 19); /* last slice flag */
670 OUT_BCS_BATCH(batch, 0);
671 OUT_BCS_BATCH(batch, 0);
672 OUT_BCS_BATCH(batch, 0);
673 OUT_BCS_BATCH(batch, 0);
674 ADVANCE_BCS_BATCH(batch);
678 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
679 VAPictureParameterBufferH264 *pic_param,
680 VASliceParameterBufferH264 *slice_param,
681 struct gen7_mfd_context *gen7_mfd_context)
683 gen6_send_avc_ref_idx_state(
684 gen7_mfd_context->base.batch,
686 gen7_mfd_context->reference_surface
691 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
692 VAPictureParameterBufferH264 *pic_param,
693 VASliceParameterBufferH264 *slice_param,
694 struct gen7_mfd_context *gen7_mfd_context)
696 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
697 int i, j, num_weight_offset_table = 0;
698 short weightoffsets[32 * 6];
700 if ((slice_param->slice_type == SLICE_TYPE_P ||
701 slice_param->slice_type == SLICE_TYPE_SP) &&
702 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
703 num_weight_offset_table = 1;
706 if ((slice_param->slice_type == SLICE_TYPE_B) &&
707 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
708 num_weight_offset_table = 2;
711 for (i = 0; i < num_weight_offset_table; i++) {
712 BEGIN_BCS_BATCH(batch, 98);
713 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
714 OUT_BCS_BATCH(batch, i);
717 for (j = 0; j < 32; j++) {
718 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
719 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
720 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
721 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
722 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
723 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
726 for (j = 0; j < 32; j++) {
727 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
728 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
729 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
730 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
731 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
732 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
736 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
737 ADVANCE_BCS_BATCH(batch);
742 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
743 VAPictureParameterBufferH264 *pic_param,
744 VASliceParameterBufferH264 *slice_param,
745 dri_bo *slice_data_bo,
746 VASliceParameterBufferH264 *next_slice_param,
747 struct gen7_mfd_context *gen7_mfd_context)
749 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
750 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
752 pic_param->pic_fields.bits.entropy_coding_mode_flag);
754 /* the input bitsteam format on GEN7 differs from GEN6 */
755 BEGIN_BCS_BATCH(batch, 6);
756 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
758 (slice_param->slice_data_size));
759 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
767 ((slice_data_bit_offset >> 3) << 16) |
771 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
772 (slice_data_bit_offset & 0x7));
773 OUT_BCS_BATCH(batch, 0);
774 ADVANCE_BCS_BATCH(batch);
778 gen8_mfd_avc_context_init(
779 VADriverContextP ctx,
780 struct gen7_mfd_context *gen7_mfd_context
783 /* Initialize flat scaling lists */
784 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
788 gen8_mfd_avc_decode_init(VADriverContextP ctx,
789 struct decode_state *decode_state,
790 struct gen7_mfd_context *gen7_mfd_context)
792 VAPictureParameterBufferH264 *pic_param;
793 VASliceParameterBufferH264 *slice_param;
794 struct i965_driver_data *i965 = i965_driver_data(ctx);
795 struct object_surface *obj_surface;
797 int i, j, enable_avc_ildb = 0;
798 unsigned int width_in_mbs, height_in_mbs;
800 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
801 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
802 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
804 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
805 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
806 assert((slice_param->slice_type == SLICE_TYPE_I) ||
807 (slice_param->slice_type == SLICE_TYPE_SI) ||
808 (slice_param->slice_type == SLICE_TYPE_P) ||
809 (slice_param->slice_type == SLICE_TYPE_SP) ||
810 (slice_param->slice_type == SLICE_TYPE_B));
812 if (slice_param->disable_deblocking_filter_idc != 1) {
821 assert(decode_state->pic_param && decode_state->pic_param->buffer);
822 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
823 gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
824 gen7_mfd_context->reference_surface);
825 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
826 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
827 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
828 assert(height_in_mbs > 0 && height_in_mbs <= 256);
830 /* Current decoded picture */
831 obj_surface = decode_state->render_object;
832 if (pic_param->pic_fields.bits.reference_pic_flag)
833 obj_surface->flags |= SURFACE_REFERENCED;
835 obj_surface->flags &= ~SURFACE_REFERENCED;
837 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
838 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
840 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
841 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
842 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
843 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
845 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
846 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
847 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
848 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
850 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
851 bo = dri_bo_alloc(i965->intel.bufmgr,
856 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
857 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
859 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
860 bo = dri_bo_alloc(i965->intel.bufmgr,
861 "deblocking filter row store",
862 width_in_mbs * 64 * 4,
865 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
866 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
868 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
869 bo = dri_bo_alloc(i965->intel.bufmgr,
871 width_in_mbs * 64 * 2,
874 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
875 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
877 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
878 bo = dri_bo_alloc(i965->intel.bufmgr,
880 width_in_mbs * 64 * 2,
883 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
884 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
886 gen7_mfd_context->bitplane_read_buffer.valid = 0;
890 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
891 struct decode_state *decode_state,
892 struct gen7_mfd_context *gen7_mfd_context)
894 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
895 VAPictureParameterBufferH264 *pic_param;
896 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
897 dri_bo *slice_data_bo;
900 assert(decode_state->pic_param && decode_state->pic_param->buffer);
901 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
902 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
904 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
905 intel_batchbuffer_emit_mi_flush(batch);
906 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
907 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
908 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
909 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
910 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
911 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
912 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
914 for (j = 0; j < decode_state->num_slice_params; j++) {
915 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
916 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
917 slice_data_bo = decode_state->slice_datas[j]->bo;
918 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
920 if (j == decode_state->num_slice_params - 1)
921 next_slice_group_param = NULL;
923 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
925 if (j == 0 && slice_param->first_mb_in_slice)
926 gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
928 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
929 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
930 assert((slice_param->slice_type == SLICE_TYPE_I) ||
931 (slice_param->slice_type == SLICE_TYPE_SI) ||
932 (slice_param->slice_type == SLICE_TYPE_P) ||
933 (slice_param->slice_type == SLICE_TYPE_SP) ||
934 (slice_param->slice_type == SLICE_TYPE_B));
936 if (i < decode_state->slice_params[j]->num_elements - 1)
937 next_slice_param = slice_param + 1;
939 next_slice_param = next_slice_group_param;
941 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
942 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
943 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
944 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
945 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
950 intel_batchbuffer_end_atomic(batch);
951 intel_batchbuffer_flush(batch);
955 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
956 struct decode_state *decode_state,
957 struct gen7_mfd_context *gen7_mfd_context)
959 VAPictureParameterBufferMPEG2 *pic_param;
960 struct i965_driver_data *i965 = i965_driver_data(ctx);
961 struct object_surface *obj_surface;
963 unsigned int width_in_mbs;
965 assert(decode_state->pic_param && decode_state->pic_param->buffer);
966 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
967 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
969 mpeg2_set_reference_surfaces(
971 gen7_mfd_context->reference_surface,
976 /* Current decoded picture */
977 obj_surface = decode_state->render_object;
978 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
980 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
981 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
982 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
983 gen7_mfd_context->pre_deblocking_output.valid = 1;
985 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
986 bo = dri_bo_alloc(i965->intel.bufmgr,
991 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
992 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
994 gen7_mfd_context->post_deblocking_output.valid = 0;
995 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
996 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
997 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
998 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1002 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1003 struct decode_state *decode_state,
1004 struct gen7_mfd_context *gen7_mfd_context)
1006 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1007 VAPictureParameterBufferMPEG2 *pic_param;
1008 unsigned int slice_concealment_disable_bit = 0;
1010 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1011 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1013 slice_concealment_disable_bit = 1;
1015 BEGIN_BCS_BATCH(batch, 13);
1016 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1017 OUT_BCS_BATCH(batch,
1018 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1019 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1020 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1021 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1022 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1023 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1024 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1025 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1026 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1027 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1028 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1029 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1030 OUT_BCS_BATCH(batch,
1031 pic_param->picture_coding_type << 9);
1032 OUT_BCS_BATCH(batch,
1033 (slice_concealment_disable_bit << 31) |
1034 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1035 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1036 OUT_BCS_BATCH(batch, 0);
1037 OUT_BCS_BATCH(batch, 0);
1038 OUT_BCS_BATCH(batch, 0);
1039 OUT_BCS_BATCH(batch, 0);
1040 OUT_BCS_BATCH(batch, 0);
1041 OUT_BCS_BATCH(batch, 0);
1042 OUT_BCS_BATCH(batch, 0);
1043 OUT_BCS_BATCH(batch, 0);
1044 OUT_BCS_BATCH(batch, 0);
1045 ADVANCE_BCS_BATCH(batch);
1049 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1050 struct decode_state *decode_state,
1051 struct gen7_mfd_context *gen7_mfd_context)
1053 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1056 /* Update internal QM state */
1057 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1058 VAIQMatrixBufferMPEG2 * const iq_matrix =
1059 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1061 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1062 iq_matrix->load_intra_quantiser_matrix) {
1063 gen_iq_matrix->load_intra_quantiser_matrix =
1064 iq_matrix->load_intra_quantiser_matrix;
1065 if (iq_matrix->load_intra_quantiser_matrix) {
1066 for (j = 0; j < 64; j++)
1067 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1068 iq_matrix->intra_quantiser_matrix[j];
1072 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1073 iq_matrix->load_non_intra_quantiser_matrix) {
1074 gen_iq_matrix->load_non_intra_quantiser_matrix =
1075 iq_matrix->load_non_intra_quantiser_matrix;
1076 if (iq_matrix->load_non_intra_quantiser_matrix) {
1077 for (j = 0; j < 64; j++)
1078 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1079 iq_matrix->non_intra_quantiser_matrix[j];
1084 /* Commit QM state to HW */
1085 for (i = 0; i < 2; i++) {
1086 unsigned char *qm = NULL;
1090 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1091 qm = gen_iq_matrix->intra_quantiser_matrix;
1092 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1095 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1096 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1097 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1104 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1109 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1110 VAPictureParameterBufferMPEG2 *pic_param,
1111 VASliceParameterBufferMPEG2 *slice_param,
1112 VASliceParameterBufferMPEG2 *next_slice_param,
1113 struct gen7_mfd_context *gen7_mfd_context)
1115 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1116 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1117 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1119 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1120 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1122 is_field_pic_wa = is_field_pic &&
1123 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1125 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1126 hpos0 = slice_param->slice_horizontal_position;
1128 if (next_slice_param == NULL) {
1129 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1132 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1133 hpos1 = next_slice_param->slice_horizontal_position;
1136 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1138 BEGIN_BCS_BATCH(batch, 5);
1139 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1140 OUT_BCS_BATCH(batch,
1141 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1142 OUT_BCS_BATCH(batch,
1143 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1144 OUT_BCS_BATCH(batch,
1148 (next_slice_param == NULL) << 5 |
1149 (next_slice_param == NULL) << 3 |
1150 (slice_param->macroblock_offset & 0x7));
1151 OUT_BCS_BATCH(batch,
1152 (slice_param->quantiser_scale_code << 24) |
1153 (vpos1 << 8 | hpos1));
1154 ADVANCE_BCS_BATCH(batch);
1158 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1159 struct decode_state *decode_state,
1160 struct gen7_mfd_context *gen7_mfd_context)
1162 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1163 VAPictureParameterBufferMPEG2 *pic_param;
1164 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1165 dri_bo *slice_data_bo;
1168 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1169 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1171 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1172 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1173 intel_batchbuffer_emit_mi_flush(batch);
1174 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1175 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1176 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1177 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1178 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1179 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1181 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1182 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1183 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1185 for (j = 0; j < decode_state->num_slice_params; j++) {
1186 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1187 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1188 slice_data_bo = decode_state->slice_datas[j]->bo;
1189 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191 if (j == decode_state->num_slice_params - 1)
1192 next_slice_group_param = NULL;
1194 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1196 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1197 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1199 if (i < decode_state->slice_params[j]->num_elements - 1)
1200 next_slice_param = slice_param + 1;
1202 next_slice_param = next_slice_group_param;
1204 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1209 intel_batchbuffer_end_atomic(batch);
1210 intel_batchbuffer_flush(batch);
1213 static const int va_to_gen7_vc1_pic_type[5] = {
1217 GEN7_VC1_BI_PICTURE,
1221 static const int va_to_gen7_vc1_mv[4] = {
1223 2, /* 1-MV half-pel */
1224 3, /* 1-MV half-pef bilinear */
1228 static const int b_picture_scale_factor[21] = {
1229 128, 85, 170, 64, 192,
1230 51, 102, 153, 204, 43,
1231 215, 37, 74, 111, 148,
1232 185, 222, 32, 96, 160,
1236 static const int va_to_gen7_vc1_condover[3] = {
1242 static const int va_to_gen7_vc1_profile[4] = {
1243 GEN7_VC1_SIMPLE_PROFILE,
1244 GEN7_VC1_MAIN_PROFILE,
1245 GEN7_VC1_RESERVED_PROFILE,
1246 GEN7_VC1_ADVANCED_PROFILE
1250 gen8_mfd_free_vc1_surface(void **data)
1252 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1254 if (!gen7_vc1_surface)
1257 dri_bo_unreference(gen7_vc1_surface->dmv);
1258 free(gen7_vc1_surface);
1263 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1264 VAPictureParameterBufferVC1 *pic_param,
1265 struct object_surface *obj_surface)
1267 struct i965_driver_data *i965 = i965_driver_data(ctx);
1268 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1269 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1270 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1272 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1274 if (!gen7_vc1_surface) {
1275 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1276 assert((obj_surface->size & 0x3f) == 0);
1277 obj_surface->private_data = gen7_vc1_surface;
1280 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1282 if (gen7_vc1_surface->dmv == NULL) {
1283 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1284 "direct mv w/r buffer",
1285 width_in_mbs * height_in_mbs * 64,
1291 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1292 struct decode_state *decode_state,
1293 struct gen7_mfd_context *gen7_mfd_context)
1295 VAPictureParameterBufferVC1 *pic_param;
1296 struct i965_driver_data *i965 = i965_driver_data(ctx);
1297 struct object_surface *obj_surface;
1302 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1303 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1304 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1305 picture_type = pic_param->picture_fields.bits.picture_type;
1307 intel_update_vc1_frame_store_index(ctx,
1310 gen7_mfd_context->reference_surface);
1312 /* Current decoded picture */
1313 obj_surface = decode_state->render_object;
1314 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1315 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1317 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1318 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1319 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1320 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1322 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1323 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1324 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1325 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1327 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1328 bo = dri_bo_alloc(i965->intel.bufmgr,
1333 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1334 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1336 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1337 bo = dri_bo_alloc(i965->intel.bufmgr,
1338 "deblocking filter row store",
1339 width_in_mbs * 7 * 64,
1342 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1343 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1345 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1346 bo = dri_bo_alloc(i965->intel.bufmgr,
1347 "bsd mpc row store",
1351 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1352 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1354 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1356 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1357 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1359 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1360 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1361 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1362 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1364 uint8_t *src = NULL, *dst = NULL;
1366 assert(decode_state->bit_plane->buffer);
1367 src = decode_state->bit_plane->buffer;
1369 bo = dri_bo_alloc(i965->intel.bufmgr,
1371 bitplane_width * height_in_mbs,
1374 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1376 dri_bo_map(bo, True);
1377 assert(bo->virtual);
1380 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1381 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1382 int src_index, dst_index;
1386 src_index = (src_h * width_in_mbs + src_w) / 2;
1387 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1388 src_value = ((src[src_index] >> src_shift) & 0xf);
1390 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1394 dst_index = src_w / 2;
1395 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1399 dst[src_w / 2] >>= 4;
1401 dst += bitplane_width;
1406 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1410 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1411 struct decode_state *decode_state,
1412 struct gen7_mfd_context *gen7_mfd_context)
1414 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1415 VAPictureParameterBufferVC1 *pic_param;
1416 struct object_surface *obj_surface;
1417 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1418 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1419 int unified_mv_mode;
1420 int ref_field_pic_polarity = 0;
1421 int scale_factor = 0;
1423 int dmv_surface_valid = 0;
1429 int interpolation_mode = 0;
1431 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1432 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1434 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1435 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1436 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1437 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1438 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1439 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1440 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1441 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1444 alt_pquant_config = 0;
1445 alt_pquant_edge_mask = 0;
1446 } else if (dquant == 2) {
1447 alt_pquant_config = 1;
1448 alt_pquant_edge_mask = 0xf;
1450 assert(dquant == 1);
1451 if (dquantfrm == 0) {
1452 alt_pquant_config = 0;
1453 alt_pquant_edge_mask = 0;
1456 assert(dquantfrm == 1);
1457 alt_pquant_config = 1;
1459 switch (dqprofile) {
1461 if (dqbilevel == 0) {
1462 alt_pquant_config = 2;
1463 alt_pquant_edge_mask = 0;
1465 assert(dqbilevel == 1);
1466 alt_pquant_config = 3;
1467 alt_pquant_edge_mask = 0;
1472 alt_pquant_edge_mask = 0xf;
1477 alt_pquant_edge_mask = 0x9;
1479 alt_pquant_edge_mask = (0x3 << dqdbedge);
1484 alt_pquant_edge_mask = (0x1 << dqsbedge);
1493 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1494 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1495 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1497 assert(pic_param->mv_fields.bits.mv_mode < 4);
1498 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1501 if (pic_param->sequence_fields.bits.interlace == 1 &&
1502 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1503 /* FIXME: calculate reference field picture polarity */
1505 ref_field_pic_polarity = 0;
1508 if (pic_param->b_picture_fraction < 21)
1509 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1511 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1513 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1514 picture_type == GEN7_VC1_I_PICTURE)
1515 picture_type = GEN7_VC1_BI_PICTURE;
1517 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1518 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1520 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1523 * 8.3.6.2.1 Transform Type Selection
1524 * If variable-sized transform coding is not enabled,
1525 * then the 8x8 transform shall be used for all blocks.
1526 * it is also MFX_VC1_PIC_STATE requirement.
1528 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1529 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1530 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1534 if (picture_type == GEN7_VC1_B_PICTURE) {
1535 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1537 obj_surface = decode_state->reference_objects[1];
1540 gen7_vc1_surface = obj_surface->private_data;
1542 if (!gen7_vc1_surface ||
1543 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1544 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1545 dmv_surface_valid = 0;
1547 dmv_surface_valid = 1;
1550 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1552 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1553 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1555 if (pic_param->picture_fields.bits.top_field_first)
1561 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1562 brfd = pic_param->reference_fields.bits.reference_distance;
1563 brfd = (scale_factor * brfd) >> 8;
1564 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1570 overlap = pic_param->sequence_fields.bits.overlap;
1574 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1575 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1576 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1580 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1581 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1584 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1585 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1586 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1588 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1589 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1596 assert(pic_param->conditional_overlap_flag < 3);
1597 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1599 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1600 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1601 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1602 interpolation_mode = 9; /* Half-pel bilinear */
1603 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1604 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1605 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1606 interpolation_mode = 1; /* Half-pel bicubic */
1608 interpolation_mode = 0; /* Quarter-pel bicubic */
1610 BEGIN_BCS_BATCH(batch, 6);
1611 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1612 OUT_BCS_BATCH(batch,
1613 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1614 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1615 OUT_BCS_BATCH(batch,
1616 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1617 dmv_surface_valid << 15 |
1618 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1619 pic_param->rounding_control << 13 |
1620 pic_param->sequence_fields.bits.syncmarker << 12 |
1621 interpolation_mode << 8 |
1622 0 << 7 | /* FIXME: scale up or down ??? */
1623 pic_param->range_reduction_frame << 6 |
1624 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1626 !pic_param->picture_fields.bits.is_first_field << 3 |
1627 (pic_param->sequence_fields.bits.profile == 3) << 0);
1628 OUT_BCS_BATCH(batch,
1629 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1630 picture_type << 26 |
1633 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1635 OUT_BCS_BATCH(batch,
1636 unified_mv_mode << 28 |
1637 pic_param->mv_fields.bits.four_mv_switch << 27 |
1638 pic_param->fast_uvmc_flag << 26 |
1639 ref_field_pic_polarity << 25 |
1640 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1641 pic_param->reference_fields.bits.reference_distance << 20 |
1642 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1643 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1644 pic_param->mv_fields.bits.extended_mv_range << 8 |
1645 alt_pquant_edge_mask << 4 |
1646 alt_pquant_config << 2 |
1647 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1648 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1649 OUT_BCS_BATCH(batch,
1650 !!pic_param->bitplane_present.value << 31 |
1651 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1652 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1653 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1654 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1655 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1656 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1657 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1658 pic_param->mv_fields.bits.mv_table << 20 |
1659 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1660 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1661 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1662 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1663 pic_param->mb_mode_table << 8 |
1665 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1666 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1667 pic_param->cbp_table << 0);
1668 ADVANCE_BCS_BATCH(batch);
1672 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1673 struct decode_state *decode_state,
1674 struct gen7_mfd_context *gen7_mfd_context)
1676 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1677 VAPictureParameterBufferVC1 *pic_param;
1678 int intensitycomp_single;
1680 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1681 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1683 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1684 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1685 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1687 BEGIN_BCS_BATCH(batch, 6);
1688 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1689 OUT_BCS_BATCH(batch,
1690 0 << 14 | /* FIXME: double ??? */
1692 intensitycomp_single << 10 |
1693 intensitycomp_single << 8 |
1694 0 << 4 | /* FIXME: interlace mode */
1696 OUT_BCS_BATCH(batch,
1697 pic_param->luma_shift << 16 |
1698 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1699 OUT_BCS_BATCH(batch, 0);
1700 OUT_BCS_BATCH(batch, 0);
1701 OUT_BCS_BATCH(batch, 0);
1702 ADVANCE_BCS_BATCH(batch);
1706 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1707 struct decode_state *decode_state,
1708 struct gen7_mfd_context *gen7_mfd_context)
1710 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1711 struct object_surface *obj_surface;
1712 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1714 obj_surface = decode_state->render_object;
1716 if (obj_surface && obj_surface->private_data) {
1717 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1720 obj_surface = decode_state->reference_objects[1];
1722 if (obj_surface && obj_surface->private_data) {
1723 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1726 BEGIN_BCS_BATCH(batch, 7);
1727 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1729 if (dmv_write_buffer)
1730 OUT_BCS_RELOC(batch, dmv_write_buffer,
1731 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1734 OUT_BCS_BATCH(batch, 0);
1736 OUT_BCS_BATCH(batch, 0);
1737 OUT_BCS_BATCH(batch, 0);
1739 if (dmv_read_buffer)
1740 OUT_BCS_RELOC(batch, dmv_read_buffer,
1741 I915_GEM_DOMAIN_INSTRUCTION, 0,
1744 OUT_BCS_BATCH(batch, 0);
1746 OUT_BCS_BATCH(batch, 0);
1747 OUT_BCS_BATCH(batch, 0);
1749 ADVANCE_BCS_BATCH(batch);
1753 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1755 int out_slice_data_bit_offset;
1756 int slice_header_size = in_slice_data_bit_offset / 8;
1760 out_slice_data_bit_offset = in_slice_data_bit_offset;
1762 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1763 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1768 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1771 return out_slice_data_bit_offset;
1775 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1776 VAPictureParameterBufferVC1 *pic_param,
1777 VASliceParameterBufferVC1 *slice_param,
1778 VASliceParameterBufferVC1 *next_slice_param,
1779 dri_bo *slice_data_bo,
1780 struct gen7_mfd_context *gen7_mfd_context)
1782 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1783 int next_slice_start_vert_pos;
1784 int macroblock_offset;
1785 uint8_t *slice_data = NULL;
1787 dri_bo_map(slice_data_bo, 0);
1788 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1789 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1790 slice_param->macroblock_offset,
1791 pic_param->sequence_fields.bits.profile);
1792 dri_bo_unmap(slice_data_bo);
1794 if (next_slice_param)
1795 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1797 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1799 BEGIN_BCS_BATCH(batch, 5);
1800 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1801 OUT_BCS_BATCH(batch,
1802 slice_param->slice_data_size - (macroblock_offset >> 3));
1803 OUT_BCS_BATCH(batch,
1804 slice_param->slice_data_offset + (macroblock_offset >> 3));
1805 OUT_BCS_BATCH(batch,
1806 slice_param->slice_vertical_position << 16 |
1807 next_slice_start_vert_pos << 0);
1808 OUT_BCS_BATCH(batch,
1809 (macroblock_offset & 0x7));
1810 ADVANCE_BCS_BATCH(batch);
1814 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1815 struct decode_state *decode_state,
1816 struct gen7_mfd_context *gen7_mfd_context)
1818 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1819 VAPictureParameterBufferVC1 *pic_param;
1820 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1821 dri_bo *slice_data_bo;
1824 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1825 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1827 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1828 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1829 intel_batchbuffer_emit_mi_flush(batch);
1830 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1831 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1832 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1833 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1834 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1835 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1836 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1838 for (j = 0; j < decode_state->num_slice_params; j++) {
1839 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1840 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1841 slice_data_bo = decode_state->slice_datas[j]->bo;
1842 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1844 if (j == decode_state->num_slice_params - 1)
1845 next_slice_group_param = NULL;
1847 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1849 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1850 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1852 if (i < decode_state->slice_params[j]->num_elements - 1)
1853 next_slice_param = slice_param + 1;
1855 next_slice_param = next_slice_group_param;
1857 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1862 intel_batchbuffer_end_atomic(batch);
1863 intel_batchbuffer_flush(batch);
1867 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1868 struct decode_state *decode_state,
1869 struct gen7_mfd_context *gen7_mfd_context)
1871 struct object_surface *obj_surface;
1872 VAPictureParameterBufferJPEGBaseline *pic_param;
1873 int subsampling = SUBSAMPLE_YUV420;
1874 int fourcc = VA_FOURCC_IMC3;
1876 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1878 if (pic_param->num_components == 1)
1879 subsampling = SUBSAMPLE_YUV400;
1880 else if (pic_param->num_components == 3) {
1881 int h1 = pic_param->components[0].h_sampling_factor;
1882 int h2 = pic_param->components[1].h_sampling_factor;
1883 int h3 = pic_param->components[2].h_sampling_factor;
1884 int v1 = pic_param->components[0].v_sampling_factor;
1885 int v2 = pic_param->components[1].v_sampling_factor;
1886 int v3 = pic_param->components[2].v_sampling_factor;
1888 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1889 v1 == 2 && v2 == 1 && v3 == 1) {
1890 subsampling = SUBSAMPLE_YUV420;
1891 fourcc = VA_FOURCC_IMC3;
1892 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1893 v1 == 1 && v2 == 1 && v3 == 1) {
1894 subsampling = SUBSAMPLE_YUV422H;
1895 fourcc = VA_FOURCC_422H;
1896 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1897 v1 == 1 && v2 == 1 && v3 == 1) {
1898 subsampling = SUBSAMPLE_YUV444;
1899 fourcc = VA_FOURCC_444P;
1900 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1901 v1 == 1 && v2 == 1 && v3 == 1) {
1902 subsampling = SUBSAMPLE_YUV411;
1903 fourcc = VA_FOURCC_411P;
1904 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1905 v1 == 2 && v2 == 1 && v3 == 1) {
1906 subsampling = SUBSAMPLE_YUV422V;
1907 fourcc = VA_FOURCC_422V;
1908 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1909 v1 == 2 && v2 == 2 && v3 == 2) {
1910 subsampling = SUBSAMPLE_YUV422H;
1911 fourcc = VA_FOURCC_422H;
1912 } else if (h1 == 2 && h2 == 2 && h3 == 2 &&
1913 v1 == 2 && v2 == 1 && v3 == 1) {
1914 subsampling = SUBSAMPLE_YUV422V;
1915 fourcc = VA_FOURCC_422V;
1923 /* Current decoded picture */
1924 obj_surface = decode_state->render_object;
1925 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1927 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1928 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1929 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1930 gen7_mfd_context->pre_deblocking_output.valid = 1;
1932 gen7_mfd_context->post_deblocking_output.bo = NULL;
1933 gen7_mfd_context->post_deblocking_output.valid = 0;
1935 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1936 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1938 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1939 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1941 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1942 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1944 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1945 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1947 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1948 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1951 static const int va_to_gen7_jpeg_rotation[4] = {
1952 GEN7_JPEG_ROTATION_0,
1953 GEN7_JPEG_ROTATION_90,
1954 GEN7_JPEG_ROTATION_180,
1955 GEN7_JPEG_ROTATION_270
1959 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1960 struct decode_state *decode_state,
1961 struct gen7_mfd_context *gen7_mfd_context)
1963 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1964 VAPictureParameterBufferJPEGBaseline *pic_param;
1965 int chroma_type = GEN7_YUV420;
1966 int frame_width_in_blks;
1967 int frame_height_in_blks;
1969 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1970 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1972 if (pic_param->num_components == 1)
1973 chroma_type = GEN7_YUV400;
1974 else if (pic_param->num_components == 3) {
1975 int h1 = pic_param->components[0].h_sampling_factor;
1976 int h2 = pic_param->components[1].h_sampling_factor;
1977 int h3 = pic_param->components[2].h_sampling_factor;
1978 int v1 = pic_param->components[0].v_sampling_factor;
1979 int v2 = pic_param->components[1].v_sampling_factor;
1980 int v3 = pic_param->components[2].v_sampling_factor;
1982 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1983 v1 == 2 && v2 == 1 && v3 == 1)
1984 chroma_type = GEN7_YUV420;
1985 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1986 v1 == 1 && v2 == 1 && v3 == 1)
1987 chroma_type = GEN7_YUV422H_2Y;
1988 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1989 v1 == 1 && v2 == 1 && v3 == 1)
1990 chroma_type = GEN7_YUV444;
1991 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1992 v1 == 1 && v2 == 1 && v3 == 1)
1993 chroma_type = GEN7_YUV411;
1994 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1995 v1 == 2 && v2 == 1 && v3 == 1)
1996 chroma_type = GEN7_YUV422V_2Y;
1997 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1998 v1 == 2 && v2 == 2 && v3 == 2)
1999 chroma_type = GEN7_YUV422H_4Y;
2000 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2001 v1 == 2 && v2 == 1 && v3 == 1)
2002 chroma_type = GEN7_YUV422V_4Y;
2007 if (chroma_type == GEN7_YUV400 ||
2008 chroma_type == GEN7_YUV444 ||
2009 chroma_type == GEN7_YUV422V_2Y) {
2010 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2011 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2012 } else if (chroma_type == GEN7_YUV411) {
2013 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2014 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2016 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2017 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2020 BEGIN_BCS_BATCH(batch, 3);
2021 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2022 OUT_BCS_BATCH(batch,
2023 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2024 (chroma_type << 0));
2025 OUT_BCS_BATCH(batch,
2026 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2027 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2028 ADVANCE_BCS_BATCH(batch);
2031 static const int va_to_gen7_jpeg_hufftable[2] = {
2037 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2038 struct decode_state *decode_state,
2039 struct gen7_mfd_context *gen7_mfd_context,
2042 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2043 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2046 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2049 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2051 for (index = 0; index < num_tables; index++) {
2052 int id = va_to_gen7_jpeg_hufftable[index];
2053 if (!huffman_table->load_huffman_table[index])
2055 BEGIN_BCS_BATCH(batch, 53);
2056 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2057 OUT_BCS_BATCH(batch, id);
2058 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2059 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2060 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2061 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2062 ADVANCE_BCS_BATCH(batch);
2066 static const int va_to_gen7_jpeg_qm[5] = {
2068 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2069 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2070 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2071 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2075 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2076 struct decode_state *decode_state,
2077 struct gen7_mfd_context *gen7_mfd_context)
2079 VAPictureParameterBufferJPEGBaseline *pic_param;
2080 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2083 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2086 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2087 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2089 assert(pic_param->num_components <= 3);
2091 for (index = 0; index < pic_param->num_components; index++) {
2092 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2094 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2095 unsigned char raster_qm[64];
2098 if (id > 4 || id < 1)
2101 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2104 qm_type = va_to_gen7_jpeg_qm[id];
2106 for (j = 0; j < 64; j++)
2107 raster_qm[zigzag_direct[j]] = qm[j];
2109 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2114 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2115 VAPictureParameterBufferJPEGBaseline *pic_param,
2116 VASliceParameterBufferJPEGBaseline *slice_param,
2117 VASliceParameterBufferJPEGBaseline *next_slice_param,
2118 dri_bo *slice_data_bo,
2119 struct gen7_mfd_context *gen7_mfd_context)
2121 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2122 int scan_component_mask = 0;
2125 assert(slice_param->num_components > 0);
2126 assert(slice_param->num_components < 4);
2127 assert(slice_param->num_components <= pic_param->num_components);
2129 for (i = 0; i < slice_param->num_components; i++) {
2130 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2132 scan_component_mask |= (1 << 0);
2135 scan_component_mask |= (1 << 1);
2138 scan_component_mask |= (1 << 2);
2146 BEGIN_BCS_BATCH(batch, 6);
2147 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2148 OUT_BCS_BATCH(batch,
2149 slice_param->slice_data_size);
2150 OUT_BCS_BATCH(batch,
2151 slice_param->slice_data_offset);
2152 OUT_BCS_BATCH(batch,
2153 slice_param->slice_horizontal_position << 16 |
2154 slice_param->slice_vertical_position << 0);
2155 OUT_BCS_BATCH(batch,
2156 ((slice_param->num_components != 1) << 30) | /* interleaved */
2157 (scan_component_mask << 27) | /* scan components */
2158 (0 << 26) | /* disable interrupt allowed */
2159 (slice_param->num_mcus << 0)); /* MCU count */
2160 OUT_BCS_BATCH(batch,
2161 (slice_param->restart_interval << 0)); /* RestartInterval */
2162 ADVANCE_BCS_BATCH(batch);
2165 /* Workaround for JPEG decoding on Ivybridge */
2171 unsigned char data[32];
2173 int data_bit_offset;
2175 } gen7_jpeg_wa_clip = {
2179 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2180 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2188 gen8_jpeg_wa_init(VADriverContextP ctx,
2189 struct gen7_mfd_context *gen7_mfd_context)
2191 struct i965_driver_data *i965 = i965_driver_data(ctx);
2193 struct object_surface *obj_surface;
2195 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2196 i965_DestroySurfaces(ctx,
2197 &gen7_mfd_context->jpeg_wa_surface_id,
2200 status = i965_CreateSurfaces(ctx,
2201 gen7_jpeg_wa_clip.width,
2202 gen7_jpeg_wa_clip.height,
2203 VA_RT_FORMAT_YUV420,
2205 &gen7_mfd_context->jpeg_wa_surface_id);
2206 assert(status == VA_STATUS_SUCCESS);
2208 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2209 assert(obj_surface);
2210 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2211 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2213 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2214 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2218 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2220 gen7_jpeg_wa_clip.data_size,
2221 gen7_jpeg_wa_clip.data);
2226 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2227 struct gen7_mfd_context *gen7_mfd_context)
2229 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2231 BEGIN_BCS_BATCH(batch, 5);
2232 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2233 OUT_BCS_BATCH(batch,
2234 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2235 (MFD_MODE_VLD << 15) | /* VLD mode */
2236 (0 << 10) | /* disable Stream-Out */
2237 (0 << 9) | /* Post Deblocking Output */
2238 (1 << 8) | /* Pre Deblocking Output */
2239 (0 << 5) | /* not in stitch mode */
2240 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2241 (MFX_FORMAT_AVC << 0));
2242 OUT_BCS_BATCH(batch,
2243 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2244 (0 << 3) | /* terminate if AVC mbdata error occurs */
2245 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2248 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2249 OUT_BCS_BATCH(batch, 0); /* reserved */
2250 ADVANCE_BCS_BATCH(batch);
2254 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2255 struct gen7_mfd_context *gen7_mfd_context)
2257 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2258 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2260 BEGIN_BCS_BATCH(batch, 6);
2261 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2262 OUT_BCS_BATCH(batch, 0);
2263 OUT_BCS_BATCH(batch,
2264 ((obj_surface->orig_width - 1) << 18) |
2265 ((obj_surface->orig_height - 1) << 4));
2266 OUT_BCS_BATCH(batch,
2267 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2268 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2269 (0 << 22) | /* surface object control state, ignored */
2270 ((obj_surface->width - 1) << 3) | /* pitch */
2271 (0 << 2) | /* must be 0 */
2272 (1 << 1) | /* must be tiled */
2273 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2274 OUT_BCS_BATCH(batch,
2275 (0 << 16) | /* X offset for U(Cb), must be 0 */
2276 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2277 OUT_BCS_BATCH(batch,
2278 (0 << 16) | /* X offset for V(Cr), must be 0 */
2279 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2280 ADVANCE_BCS_BATCH(batch);
2284 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2285 struct gen7_mfd_context *gen7_mfd_context)
2287 struct i965_driver_data *i965 = i965_driver_data(ctx);
2288 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2289 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2293 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2298 BEGIN_BCS_BATCH(batch, 61);
2299 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2300 OUT_BCS_RELOC(batch,
2302 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2304 OUT_BCS_BATCH(batch, 0);
2305 OUT_BCS_BATCH(batch, 0);
2308 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2309 OUT_BCS_BATCH(batch, 0);
2310 OUT_BCS_BATCH(batch, 0);
2312 /* uncompressed-video & stream out 7-12 */
2313 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2314 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2315 OUT_BCS_BATCH(batch, 0);
2316 OUT_BCS_BATCH(batch, 0);
2317 OUT_BCS_BATCH(batch, 0);
2318 OUT_BCS_BATCH(batch, 0);
2320 /* the DW 13-15 is for intra row store scratch */
2321 OUT_BCS_RELOC(batch,
2323 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2325 OUT_BCS_BATCH(batch, 0);
2326 OUT_BCS_BATCH(batch, 0);
2328 /* the DW 16-18 is for deblocking filter */
2329 OUT_BCS_BATCH(batch, 0);
2330 OUT_BCS_BATCH(batch, 0);
2331 OUT_BCS_BATCH(batch, 0);
2334 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2335 OUT_BCS_BATCH(batch, 0);
2336 OUT_BCS_BATCH(batch, 0);
2338 OUT_BCS_BATCH(batch, 0);
2340 /* the DW52-54 is for mb status address */
2341 OUT_BCS_BATCH(batch, 0);
2342 OUT_BCS_BATCH(batch, 0);
2343 OUT_BCS_BATCH(batch, 0);
2344 /* the DW56-60 is for ILDB & second ILDB address */
2345 OUT_BCS_BATCH(batch, 0);
2346 OUT_BCS_BATCH(batch, 0);
2347 OUT_BCS_BATCH(batch, 0);
2348 OUT_BCS_BATCH(batch, 0);
2349 OUT_BCS_BATCH(batch, 0);
2350 OUT_BCS_BATCH(batch, 0);
2352 ADVANCE_BCS_BATCH(batch);
2354 dri_bo_unreference(intra_bo);
2358 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2359 struct gen7_mfd_context *gen7_mfd_context)
2361 struct i965_driver_data *i965 = i965_driver_data(ctx);
2362 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2363 dri_bo *bsd_mpc_bo, *mpr_bo;
2365 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2366 "bsd mpc row store",
2367 11520, /* 1.5 * 120 * 64 */
2370 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2372 7680, /* 1. 0 * 120 * 64 */
2375 BEGIN_BCS_BATCH(batch, 10);
2376 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2378 OUT_BCS_RELOC(batch,
2380 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2383 OUT_BCS_BATCH(batch, 0);
2384 OUT_BCS_BATCH(batch, 0);
2386 OUT_BCS_RELOC(batch,
2388 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2390 OUT_BCS_BATCH(batch, 0);
2391 OUT_BCS_BATCH(batch, 0);
2393 OUT_BCS_BATCH(batch, 0);
2394 OUT_BCS_BATCH(batch, 0);
2395 OUT_BCS_BATCH(batch, 0);
2397 ADVANCE_BCS_BATCH(batch);
2399 dri_bo_unreference(bsd_mpc_bo);
2400 dri_bo_unreference(mpr_bo);
2404 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2405 struct gen7_mfd_context *gen7_mfd_context)
2411 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2412 struct gen7_mfd_context *gen7_mfd_context)
2414 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2416 int mbaff_frame_flag = 0;
2417 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2419 BEGIN_BCS_BATCH(batch, 16);
2420 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2421 OUT_BCS_BATCH(batch,
2422 width_in_mbs * height_in_mbs);
2423 OUT_BCS_BATCH(batch,
2424 ((height_in_mbs - 1) << 16) |
2425 ((width_in_mbs - 1) << 0));
2426 OUT_BCS_BATCH(batch,
2431 (0 << 12) | /* differ from GEN6 */
2434 OUT_BCS_BATCH(batch,
2435 (1 << 10) | /* 4:2:0 */
2436 (1 << 7) | /* CABAC */
2442 (mbaff_frame_flag << 1) |
2444 OUT_BCS_BATCH(batch, 0);
2445 OUT_BCS_BATCH(batch, 0);
2446 OUT_BCS_BATCH(batch, 0);
2447 OUT_BCS_BATCH(batch, 0);
2448 OUT_BCS_BATCH(batch, 0);
2449 OUT_BCS_BATCH(batch, 0);
2450 OUT_BCS_BATCH(batch, 0);
2451 OUT_BCS_BATCH(batch, 0);
2452 OUT_BCS_BATCH(batch, 0);
2453 OUT_BCS_BATCH(batch, 0);
2454 OUT_BCS_BATCH(batch, 0);
2455 ADVANCE_BCS_BATCH(batch);
2459 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2460 struct gen7_mfd_context *gen7_mfd_context)
2462 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2465 BEGIN_BCS_BATCH(batch, 71);
2466 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2468 /* reference surfaces 0..15 */
2469 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2470 OUT_BCS_BATCH(batch, 0); /* top */
2471 OUT_BCS_BATCH(batch, 0); /* bottom */
2474 OUT_BCS_BATCH(batch, 0);
2476 /* the current decoding frame/field */
2477 OUT_BCS_BATCH(batch, 0); /* top */
2478 OUT_BCS_BATCH(batch, 0);
2479 OUT_BCS_BATCH(batch, 0);
2482 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2483 OUT_BCS_BATCH(batch, 0);
2484 OUT_BCS_BATCH(batch, 0);
2487 OUT_BCS_BATCH(batch, 0);
2488 OUT_BCS_BATCH(batch, 0);
2490 ADVANCE_BCS_BATCH(batch);
2494 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2495 struct gen7_mfd_context *gen7_mfd_context)
2497 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2499 BEGIN_BCS_BATCH(batch, 11);
2500 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2501 OUT_BCS_RELOC(batch,
2502 gen7_mfd_context->jpeg_wa_slice_data_bo,
2503 I915_GEM_DOMAIN_INSTRUCTION, 0,
2505 OUT_BCS_BATCH(batch, 0);
2506 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2507 OUT_BCS_BATCH(batch, 0);
2508 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2509 OUT_BCS_BATCH(batch, 0);
2510 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2511 OUT_BCS_BATCH(batch, 0);
2512 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2513 OUT_BCS_BATCH(batch, 0);
2514 ADVANCE_BCS_BATCH(batch);
2518 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2519 struct gen7_mfd_context *gen7_mfd_context)
2521 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2523 /* the input bitsteam format on GEN7 differs from GEN6 */
2524 BEGIN_BCS_BATCH(batch, 6);
2525 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2526 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2527 OUT_BCS_BATCH(batch, 0);
2528 OUT_BCS_BATCH(batch,
2534 OUT_BCS_BATCH(batch,
2535 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2538 (1 << 3) | /* LastSlice Flag */
2539 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2540 OUT_BCS_BATCH(batch, 0);
2541 ADVANCE_BCS_BATCH(batch);
2545 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2546 struct gen7_mfd_context *gen7_mfd_context)
2548 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2549 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2550 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2551 int first_mb_in_slice = 0;
2552 int slice_type = SLICE_TYPE_I;
2554 BEGIN_BCS_BATCH(batch, 11);
2555 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2556 OUT_BCS_BATCH(batch, slice_type);
2557 OUT_BCS_BATCH(batch,
2558 (num_ref_idx_l1 << 24) |
2559 (num_ref_idx_l0 << 16) |
2562 OUT_BCS_BATCH(batch,
2564 (1 << 27) | /* disable Deblocking */
2566 (gen7_jpeg_wa_clip.qp << 16) |
2569 OUT_BCS_BATCH(batch,
2570 (slice_ver_pos << 24) |
2571 (slice_hor_pos << 16) |
2572 (first_mb_in_slice << 0));
2573 OUT_BCS_BATCH(batch,
2574 (next_slice_ver_pos << 16) |
2575 (next_slice_hor_pos << 0));
2576 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2577 OUT_BCS_BATCH(batch, 0);
2578 OUT_BCS_BATCH(batch, 0);
2579 OUT_BCS_BATCH(batch, 0);
2580 OUT_BCS_BATCH(batch, 0);
2581 ADVANCE_BCS_BATCH(batch);
2585 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2586 struct gen7_mfd_context *gen7_mfd_context)
2588 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2589 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2590 intel_batchbuffer_emit_mi_flush(batch);
2591 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2592 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2593 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2594 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2595 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2596 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2597 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2599 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2600 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2601 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2607 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2608 struct decode_state *decode_state,
2609 struct gen7_mfd_context *gen7_mfd_context)
2611 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2612 VAPictureParameterBufferJPEGBaseline *pic_param;
2613 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2614 dri_bo *slice_data_bo;
2615 int i, j, max_selector = 0;
2617 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2618 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2620 /* Currently only support Baseline DCT */
2621 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2622 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2624 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2626 intel_batchbuffer_emit_mi_flush(batch);
2627 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2628 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2629 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2630 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2631 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2633 for (j = 0; j < decode_state->num_slice_params; j++) {
2634 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2635 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2636 slice_data_bo = decode_state->slice_datas[j]->bo;
2637 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2639 if (j == decode_state->num_slice_params - 1)
2640 next_slice_group_param = NULL;
2642 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2644 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2647 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2649 if (i < decode_state->slice_params[j]->num_elements - 1)
2650 next_slice_param = slice_param + 1;
2652 next_slice_param = next_slice_group_param;
2654 for (component = 0; component < slice_param->num_components; component++) {
2655 if (max_selector < slice_param->components[component].dc_table_selector)
2656 max_selector = slice_param->components[component].dc_table_selector;
2658 if (max_selector < slice_param->components[component].ac_table_selector)
2659 max_selector = slice_param->components[component].ac_table_selector;
2666 assert(max_selector < 2);
2667 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2669 for (j = 0; j < decode_state->num_slice_params; j++) {
2670 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2671 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2672 slice_data_bo = decode_state->slice_datas[j]->bo;
2673 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2675 if (j == decode_state->num_slice_params - 1)
2676 next_slice_group_param = NULL;
2678 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2680 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2681 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2683 if (i < decode_state->slice_params[j]->num_elements - 1)
2684 next_slice_param = slice_param + 1;
2686 next_slice_param = next_slice_group_param;
2688 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2693 intel_batchbuffer_end_atomic(batch);
2694 intel_batchbuffer_flush(batch);
2697 static const int vp8_dc_qlookup[128] =
2699 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2700 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2701 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2702 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2703 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2704 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2705 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2706 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2709 static const int vp8_ac_qlookup[128] =
2711 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2712 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2713 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2714 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2715 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2716 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2717 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2718 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2721 static inline unsigned int vp8_clip_quantization_index(int index)
2732 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2733 struct decode_state *decode_state,
2734 struct gen7_mfd_context *gen7_mfd_context)
2736 struct object_surface *obj_surface;
2737 struct i965_driver_data *i965 = i965_driver_data(ctx);
2739 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2740 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2741 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2743 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2744 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2746 intel_update_vp8_frame_store_index(ctx,
2749 gen7_mfd_context->reference_surface);
2751 /* Current decoded picture */
2752 obj_surface = decode_state->render_object;
2753 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2755 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2756 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2757 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2758 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2760 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2761 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2762 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2763 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2765 intel_ensure_vp8_segmentation_buffer(ctx,
2766 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2768 /* The same as AVC */
2769 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2770 bo = dri_bo_alloc(i965->intel.bufmgr,
2775 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2776 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2778 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2779 bo = dri_bo_alloc(i965->intel.bufmgr,
2780 "deblocking filter row store",
2781 width_in_mbs * 64 * 4,
2784 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2785 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2787 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2788 bo = dri_bo_alloc(i965->intel.bufmgr,
2789 "bsd mpc row store",
2790 width_in_mbs * 64 * 2,
2793 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2794 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2796 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2797 bo = dri_bo_alloc(i965->intel.bufmgr,
2799 width_in_mbs * 64 * 2,
2802 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2803 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2805 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2809 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2810 struct decode_state *decode_state,
2811 struct gen7_mfd_context *gen7_mfd_context)
2813 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2814 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2815 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2816 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2817 dri_bo *probs_bo = decode_state->probability_data->bo;
2819 unsigned int quantization_value[4][6];
2821 /* There is no safe way to error out if the segmentation buffer
2822 could not be allocated. So, instead of aborting, simply decode
2823 something even if the result may look totally inacurate */
2824 const unsigned int enable_segmentation =
2825 pic_param->pic_fields.bits.segmentation_enabled &&
2826 gen7_mfd_context->segmentation_buffer.valid;
2828 log2num = (int)log2(slice_param->num_of_partitions - 1);
2830 BEGIN_BCS_BATCH(batch, 38);
2831 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2832 OUT_BCS_BATCH(batch,
2833 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2834 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2835 OUT_BCS_BATCH(batch,
2837 pic_param->pic_fields.bits.sharpness_level << 16 |
2838 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2839 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2840 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2841 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2842 (enable_segmentation &&
2843 pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
2844 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2845 (enable_segmentation &&
2846 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2847 (enable_segmentation &&
2848 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2849 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2850 pic_param->pic_fields.bits.filter_type << 4 |
2851 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2852 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2854 OUT_BCS_BATCH(batch,
2855 pic_param->loop_filter_level[3] << 24 |
2856 pic_param->loop_filter_level[2] << 16 |
2857 pic_param->loop_filter_level[1] << 8 |
2858 pic_param->loop_filter_level[0] << 0);
2860 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2861 for (i = 0; i < 4; i++) {
2862 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2863 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2864 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2865 /* 101581>>16 is equivalent to 155/100 */
2866 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2867 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2868 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2870 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2871 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2873 OUT_BCS_BATCH(batch,
2874 quantization_value[i][0] << 16 | /* Y1AC */
2875 quantization_value[i][1] << 0); /* Y1DC */
2876 OUT_BCS_BATCH(batch,
2877 quantization_value[i][5] << 16 | /* UVAC */
2878 quantization_value[i][4] << 0); /* UVDC */
2879 OUT_BCS_BATCH(batch,
2880 quantization_value[i][3] << 16 | /* Y2AC */
2881 quantization_value[i][2] << 0); /* Y2DC */
2884 /* CoeffProbability table for non-key frame, DW16-DW18 */
2886 OUT_BCS_RELOC(batch, probs_bo,
2887 0, I915_GEM_DOMAIN_INSTRUCTION,
2889 OUT_BCS_BATCH(batch, 0);
2890 OUT_BCS_BATCH(batch, 0);
2892 OUT_BCS_BATCH(batch, 0);
2893 OUT_BCS_BATCH(batch, 0);
2894 OUT_BCS_BATCH(batch, 0);
2897 OUT_BCS_BATCH(batch,
2898 pic_param->mb_segment_tree_probs[2] << 16 |
2899 pic_param->mb_segment_tree_probs[1] << 8 |
2900 pic_param->mb_segment_tree_probs[0] << 0);
2902 OUT_BCS_BATCH(batch,
2903 pic_param->prob_skip_false << 24 |
2904 pic_param->prob_intra << 16 |
2905 pic_param->prob_last << 8 |
2906 pic_param->prob_gf << 0);
2908 OUT_BCS_BATCH(batch,
2909 pic_param->y_mode_probs[3] << 24 |
2910 pic_param->y_mode_probs[2] << 16 |
2911 pic_param->y_mode_probs[1] << 8 |
2912 pic_param->y_mode_probs[0] << 0);
2914 OUT_BCS_BATCH(batch,
2915 pic_param->uv_mode_probs[2] << 16 |
2916 pic_param->uv_mode_probs[1] << 8 |
2917 pic_param->uv_mode_probs[0] << 0);
2919 /* MV update value, DW23-DW32 */
2920 for (i = 0; i < 2; i++) {
2921 for (j = 0; j < 20; j += 4) {
2922 OUT_BCS_BATCH(batch,
2923 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2924 pic_param->mv_probs[i][j + 2] << 16 |
2925 pic_param->mv_probs[i][j + 1] << 8 |
2926 pic_param->mv_probs[i][j + 0] << 0);
2930 OUT_BCS_BATCH(batch,
2931 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2932 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2933 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2934 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2936 OUT_BCS_BATCH(batch,
2937 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2938 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2939 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2940 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2942 /* segmentation id stream base address, DW35-DW37 */
2943 if (enable_segmentation) {
2944 OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2945 0, I915_GEM_DOMAIN_INSTRUCTION,
2947 OUT_BCS_BATCH(batch, 0);
2948 OUT_BCS_BATCH(batch, 0);
2951 OUT_BCS_BATCH(batch, 0);
2952 OUT_BCS_BATCH(batch, 0);
2953 OUT_BCS_BATCH(batch, 0);
2955 ADVANCE_BCS_BATCH(batch);
2959 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2960 VAPictureParameterBufferVP8 *pic_param,
2961 VASliceParameterBufferVP8 *slice_param,
2962 dri_bo *slice_data_bo,
2963 struct gen7_mfd_context *gen7_mfd_context)
2965 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2967 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2968 unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2969 unsigned int partition_size_0 = slice_param->partition_size[0];
2971 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2972 if (used_bits == 8) {
2975 partition_size_0 -= 1;
2978 assert(slice_param->num_of_partitions >= 2);
2979 assert(slice_param->num_of_partitions <= 9);
2981 log2num = (int)log2(slice_param->num_of_partitions - 1);
2983 BEGIN_BCS_BATCH(batch, 22);
2984 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2985 OUT_BCS_BATCH(batch,
2986 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2987 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2989 (slice_param->macroblock_offset & 0x7));
2990 OUT_BCS_BATCH(batch,
2991 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2994 OUT_BCS_BATCH(batch, partition_size_0 + 1);
2995 OUT_BCS_BATCH(batch, offset);
2996 //partion sizes in bytes are present after the above first partition when there are more than one token partition
2997 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2998 for (i = 1; i < 9; i++) {
2999 if (i < slice_param->num_of_partitions) {
3000 OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3001 OUT_BCS_BATCH(batch, offset);
3003 OUT_BCS_BATCH(batch, 0);
3004 OUT_BCS_BATCH(batch, 0);
3007 offset += slice_param->partition_size[i];
3010 OUT_BCS_BATCH(batch, 0); /* concealment method */
3012 ADVANCE_BCS_BATCH(batch);
3016 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3017 struct decode_state *decode_state,
3018 struct gen7_mfd_context *gen7_mfd_context)
3020 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3021 VAPictureParameterBufferVP8 *pic_param;
3022 VASliceParameterBufferVP8 *slice_param;
3023 dri_bo *slice_data_bo;
3025 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3026 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3028 /* one slice per frame */
3029 if (decode_state->num_slice_params != 1 ||
3030 (!decode_state->slice_params ||
3031 !decode_state->slice_params[0] ||
3032 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3033 (!decode_state->slice_datas ||
3034 !decode_state->slice_datas[0] ||
3035 !decode_state->slice_datas[0]->bo) ||
3036 !decode_state->probability_data) {
3037 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3042 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3043 slice_data_bo = decode_state->slice_datas[0]->bo;
3045 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3046 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3047 intel_batchbuffer_emit_mi_flush(batch);
3048 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3049 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3050 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3051 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3052 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3053 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3054 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3055 intel_batchbuffer_end_atomic(batch);
3056 intel_batchbuffer_flush(batch);
3060 gen8_mfd_decode_picture(VADriverContextP ctx,
3062 union codec_state *codec_state,
3063 struct hw_context *hw_context)
3066 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3067 struct decode_state *decode_state = &codec_state->decode;
3070 assert(gen7_mfd_context);
3072 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3074 if (vaStatus != VA_STATUS_SUCCESS)
3077 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3080 case VAProfileMPEG2Simple:
3081 case VAProfileMPEG2Main:
3082 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3085 case VAProfileH264ConstrainedBaseline:
3086 case VAProfileH264Main:
3087 case VAProfileH264High:
3088 case VAProfileH264StereoHigh:
3089 case VAProfileH264MultiviewHigh:
3090 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3093 case VAProfileVC1Simple:
3094 case VAProfileVC1Main:
3095 case VAProfileVC1Advanced:
3096 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3099 case VAProfileJPEGBaseline:
3100 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3103 case VAProfileVP8Version0_3:
3104 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3112 vaStatus = VA_STATUS_SUCCESS;
3119 gen8_mfd_context_destroy(void *hw_context)
3121 VADriverContextP ctx;
3122 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3124 ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3126 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3127 gen7_mfd_context->post_deblocking_output.bo = NULL;
3129 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3130 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3132 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3133 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3135 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3136 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3138 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3139 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3141 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3142 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3144 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3145 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3147 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3148 gen7_mfd_context->segmentation_buffer.bo = NULL;
3150 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3152 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3153 i965_DestroySurfaces(ctx,
3154 &gen7_mfd_context->jpeg_wa_surface_id,
3156 gen7_mfd_context->jpeg_wa_surface_object = NULL;
3159 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3160 free(gen7_mfd_context);
3163 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3164 struct gen7_mfd_context *gen7_mfd_context)
3166 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3167 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3168 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3169 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3173 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3175 struct intel_driver_data *intel = intel_driver_data(ctx);
3176 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3179 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3180 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3181 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3183 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3184 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3185 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3188 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3189 gen7_mfd_context->segmentation_buffer.valid = 0;
3191 switch (obj_config->profile) {
3192 case VAProfileMPEG2Simple:
3193 case VAProfileMPEG2Main:
3194 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3197 case VAProfileH264ConstrainedBaseline:
3198 case VAProfileH264Main:
3199 case VAProfileH264High:
3200 case VAProfileH264StereoHigh:
3201 case VAProfileH264MultiviewHigh:
3202 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3208 gen7_mfd_context->driver_context = ctx;
3209 return (struct hw_context *)gen7_mfd_context;