2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 assert((obj_surface->size & 0x3f) == 0);
78 obj_surface->private_data = gen7_avc_surface;
81 gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
82 !pic_param->seq_fields.bits.direct_8x8_inference_flag);
84 if (gen7_avc_surface->dmv_top == NULL) {
85 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86 "direct mv w/r buffer",
87 width_in_mbs * height_in_mbs * 128,
89 assert(gen7_avc_surface->dmv_top);
92 if (gen7_avc_surface->dmv_bottom_flag &&
93 gen7_avc_surface->dmv_bottom == NULL) {
94 gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
95 "direct mv w/r buffer",
96 width_in_mbs * height_in_mbs * 128,
98 assert(gen7_avc_surface->dmv_bottom);
103 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
104 struct decode_state *decode_state,
106 struct gen7_mfd_context *gen7_mfd_context)
108 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
110 assert(standard_select == MFX_FORMAT_MPEG2 ||
111 standard_select == MFX_FORMAT_AVC ||
112 standard_select == MFX_FORMAT_VC1 ||
113 standard_select == MFX_FORMAT_JPEG ||
114 standard_select == MFX_FORMAT_VP8);
116 BEGIN_BCS_BATCH(batch, 5);
117 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
119 (MFX_LONG_MODE << 17) | /* Currently only support long format */
120 (MFD_MODE_VLD << 15) | /* VLD mode */
121 (0 << 10) | /* disable Stream-Out */
122 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
123 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
124 (0 << 5) | /* not in stitch mode */
125 (MFX_CODEC_DECODE << 4) | /* decoding mode */
126 (standard_select << 0));
128 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
129 (0 << 3) | /* terminate if AVC mbdata error occurs */
130 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
133 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
134 OUT_BCS_BATCH(batch, 0); /* reserved */
135 ADVANCE_BCS_BATCH(batch);
139 gen8_mfd_surface_state(VADriverContextP ctx,
140 struct decode_state *decode_state,
142 struct gen7_mfd_context *gen7_mfd_context)
144 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
145 struct object_surface *obj_surface = decode_state->render_object;
146 unsigned int y_cb_offset;
147 unsigned int y_cr_offset;
151 y_cb_offset = obj_surface->y_cb_offset;
152 y_cr_offset = obj_surface->y_cr_offset;
154 BEGIN_BCS_BATCH(batch, 6);
155 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
156 OUT_BCS_BATCH(batch, 0);
158 ((obj_surface->orig_height - 1) << 18) |
159 ((obj_surface->orig_width - 1) << 4));
161 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
162 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
163 (0 << 22) | /* surface object control state, ignored */
164 ((obj_surface->width - 1) << 3) | /* pitch */
165 (0 << 2) | /* must be 0 */
166 (1 << 1) | /* must be tiled */
167 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
169 (0 << 16) | /* X offset for U(Cb), must be 0 */
170 (y_cb_offset << 0)); /* Y offset for U(Cb) */
172 (0 << 16) | /* X offset for V(Cr), must be 0 */
173 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
174 ADVANCE_BCS_BATCH(batch);
178 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
179 struct decode_state *decode_state,
181 struct gen7_mfd_context *gen7_mfd_context)
183 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
186 BEGIN_BCS_BATCH(batch, 61);
187 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188 /* Pre-deblock 1-3 */
189 if (gen7_mfd_context->pre_deblocking_output.valid)
190 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
191 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194 OUT_BCS_BATCH(batch, 0);
196 OUT_BCS_BATCH(batch, 0);
197 OUT_BCS_BATCH(batch, 0);
198 /* Post-debloing 4-6 */
199 if (gen7_mfd_context->post_deblocking_output.valid)
200 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
201 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204 OUT_BCS_BATCH(batch, 0);
206 OUT_BCS_BATCH(batch, 0);
207 OUT_BCS_BATCH(batch, 0);
209 /* uncompressed-video & stream out 7-12 */
210 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
211 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
212 OUT_BCS_BATCH(batch, 0);
213 OUT_BCS_BATCH(batch, 0);
214 OUT_BCS_BATCH(batch, 0);
215 OUT_BCS_BATCH(batch, 0);
217 /* intra row-store scratch 13-15 */
218 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
219 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
220 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
223 OUT_BCS_BATCH(batch, 0);
225 OUT_BCS_BATCH(batch, 0);
226 OUT_BCS_BATCH(batch, 0);
227 /* deblocking-filter-row-store 16-18 */
228 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
229 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
230 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
233 OUT_BCS_BATCH(batch, 0);
234 OUT_BCS_BATCH(batch, 0);
235 OUT_BCS_BATCH(batch, 0);
238 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
239 struct object_surface *obj_surface;
241 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
242 gen7_mfd_context->reference_surface[i].obj_surface &&
243 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
244 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
246 OUT_BCS_RELOC(batch, obj_surface->bo,
247 I915_GEM_DOMAIN_INSTRUCTION, 0,
250 OUT_BCS_BATCH(batch, 0);
253 OUT_BCS_BATCH(batch, 0);
256 /* reference property 51 */
257 OUT_BCS_BATCH(batch, 0);
259 /* Macroblock status & ILDB 52-57 */
260 OUT_BCS_BATCH(batch, 0);
261 OUT_BCS_BATCH(batch, 0);
262 OUT_BCS_BATCH(batch, 0);
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
265 OUT_BCS_BATCH(batch, 0);
267 /* the second Macroblock status 58-60 */
268 OUT_BCS_BATCH(batch, 0);
269 OUT_BCS_BATCH(batch, 0);
270 OUT_BCS_BATCH(batch, 0);
272 ADVANCE_BCS_BATCH(batch);
276 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
277 dri_bo *slice_data_bo,
279 struct gen7_mfd_context *gen7_mfd_context)
281 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
283 BEGIN_BCS_BATCH(batch, 26);
284 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
286 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
287 OUT_BCS_BATCH(batch, 0);
288 OUT_BCS_BATCH(batch, 0);
289 /* Upper bound 4-5 */
290 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
291 OUT_BCS_BATCH(batch, 0);
293 /* MFX indirect MV 6-10 */
294 OUT_BCS_BATCH(batch, 0);
295 OUT_BCS_BATCH(batch, 0);
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
300 /* MFX IT_COFF 11-15 */
301 OUT_BCS_BATCH(batch, 0);
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
307 /* MFX IT_DBLK 16-20 */
308 OUT_BCS_BATCH(batch, 0);
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
314 /* MFX PAK_BSE object for encoder 21-25 */
315 OUT_BCS_BATCH(batch, 0);
316 OUT_BCS_BATCH(batch, 0);
317 OUT_BCS_BATCH(batch, 0);
318 OUT_BCS_BATCH(batch, 0);
319 OUT_BCS_BATCH(batch, 0);
321 ADVANCE_BCS_BATCH(batch);
325 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
326 struct decode_state *decode_state,
328 struct gen7_mfd_context *gen7_mfd_context)
330 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
332 BEGIN_BCS_BATCH(batch, 10);
333 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
335 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
336 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
337 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
340 OUT_BCS_BATCH(batch, 0);
342 OUT_BCS_BATCH(batch, 0);
343 OUT_BCS_BATCH(batch, 0);
344 /* MPR Row Store Scratch buffer 4-6 */
345 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
346 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
347 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
350 OUT_BCS_BATCH(batch, 0);
352 OUT_BCS_BATCH(batch, 0);
353 OUT_BCS_BATCH(batch, 0);
356 if (gen7_mfd_context->bitplane_read_buffer.valid)
357 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
358 I915_GEM_DOMAIN_INSTRUCTION, 0,
361 OUT_BCS_BATCH(batch, 0);
362 OUT_BCS_BATCH(batch, 0);
363 OUT_BCS_BATCH(batch, 0);
364 ADVANCE_BCS_BATCH(batch);
368 gen8_mfd_qm_state(VADriverContextP ctx,
372 struct gen7_mfd_context *gen7_mfd_context)
374 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
375 unsigned int qm_buffer[16];
377 assert(qm_length <= 16 * 4);
378 memcpy(qm_buffer, qm, qm_length);
380 BEGIN_BCS_BATCH(batch, 18);
381 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
382 OUT_BCS_BATCH(batch, qm_type << 0);
383 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
384 ADVANCE_BCS_BATCH(batch);
388 gen8_mfd_avc_img_state(VADriverContextP ctx,
389 struct decode_state *decode_state,
390 struct gen7_mfd_context *gen7_mfd_context)
392 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
394 int mbaff_frame_flag;
395 unsigned int width_in_mbs, height_in_mbs;
396 VAPictureParameterBufferH264 *pic_param;
398 assert(decode_state->pic_param && decode_state->pic_param->buffer);
399 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
400 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
402 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
404 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
409 if ((img_struct & 0x1) == 0x1) {
410 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
412 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
415 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
416 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
417 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
419 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
422 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
423 !pic_param->pic_fields.bits.field_pic_flag);
425 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
426 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
428 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
429 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
430 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
431 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
433 BEGIN_BCS_BATCH(batch, 17);
434 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
436 width_in_mbs * height_in_mbs);
438 ((height_in_mbs - 1) << 16) |
439 ((width_in_mbs - 1) << 0));
441 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
442 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
443 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
444 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
445 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
446 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
449 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
450 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
451 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
452 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
453 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
454 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
455 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
456 (mbaff_frame_flag << 1) |
457 (pic_param->pic_fields.bits.field_pic_flag << 0));
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 OUT_BCS_BATCH(batch, 0);
465 OUT_BCS_BATCH(batch, 0);
466 OUT_BCS_BATCH(batch, 0);
467 OUT_BCS_BATCH(batch, 0);
468 OUT_BCS_BATCH(batch, 0);
469 OUT_BCS_BATCH(batch, 0);
470 ADVANCE_BCS_BATCH(batch);
474 gen8_mfd_avc_qm_state(VADriverContextP ctx,
475 struct decode_state *decode_state,
476 struct gen7_mfd_context *gen7_mfd_context)
478 VAIQMatrixBufferH264 *iq_matrix;
479 VAPictureParameterBufferH264 *pic_param;
481 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
482 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
484 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
486 assert(decode_state->pic_param && decode_state->pic_param->buffer);
487 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
489 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
490 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
492 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
493 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
494 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
499 gen8_mfd_avc_picid_state(VADriverContextP ctx,
500 struct decode_state *decode_state,
501 struct gen7_mfd_context *gen7_mfd_context)
503 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
505 BEGIN_BCS_BATCH(batch, 10);
506 OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
507 OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
508 OUT_BCS_BATCH(batch, 0);
509 OUT_BCS_BATCH(batch, 0);
510 OUT_BCS_BATCH(batch, 0);
511 OUT_BCS_BATCH(batch, 0);
512 OUT_BCS_BATCH(batch, 0);
513 OUT_BCS_BATCH(batch, 0);
514 OUT_BCS_BATCH(batch, 0);
515 OUT_BCS_BATCH(batch, 0);
516 ADVANCE_BCS_BATCH(batch);
520 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
521 struct decode_state *decode_state,
522 VAPictureParameterBufferH264 *pic_param,
523 VASliceParameterBufferH264 *slice_param,
524 struct gen7_mfd_context *gen7_mfd_context)
526 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527 struct object_surface *obj_surface;
528 GenAvcSurface *gen7_avc_surface;
529 VAPictureH264 *va_pic;
532 BEGIN_BCS_BATCH(batch, 71);
533 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
535 /* reference surfaces 0..15 */
536 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538 gen7_mfd_context->reference_surface[i].obj_surface &&
539 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
541 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542 gen7_avc_surface = obj_surface->private_data;
544 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
545 I915_GEM_DOMAIN_INSTRUCTION, 0,
547 OUT_BCS_BATCH(batch, 0);
549 OUT_BCS_BATCH(batch, 0);
550 OUT_BCS_BATCH(batch, 0);
554 OUT_BCS_BATCH(batch, 0);
556 /* the current decoding frame/field */
557 va_pic = &pic_param->CurrPic;
558 obj_surface = decode_state->render_object;
559 assert(obj_surface->bo && obj_surface->private_data);
560 gen7_avc_surface = obj_surface->private_data;
562 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
563 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
566 OUT_BCS_BATCH(batch, 0);
567 OUT_BCS_BATCH(batch, 0);
570 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
571 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
574 assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
576 for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
577 va_pic = &pic_param->ReferenceFrames[j];
579 if (va_pic->flags & VA_PICTURE_H264_INVALID)
582 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
589 assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
591 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
592 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
594 OUT_BCS_BATCH(batch, 0);
595 OUT_BCS_BATCH(batch, 0);
599 va_pic = &pic_param->CurrPic;
600 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
601 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
603 ADVANCE_BCS_BATCH(batch);
607 gen8_mfd_avc_slice_state(VADriverContextP ctx,
608 VAPictureParameterBufferH264 *pic_param,
609 VASliceParameterBufferH264 *slice_param,
610 VASliceParameterBufferH264 *next_slice_param,
611 struct gen7_mfd_context *gen7_mfd_context)
613 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
614 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
615 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
616 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
617 int num_ref_idx_l0, num_ref_idx_l1;
618 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
619 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
620 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
623 if (slice_param->slice_type == SLICE_TYPE_I ||
624 slice_param->slice_type == SLICE_TYPE_SI) {
625 slice_type = SLICE_TYPE_I;
626 } else if (slice_param->slice_type == SLICE_TYPE_P ||
627 slice_param->slice_type == SLICE_TYPE_SP) {
628 slice_type = SLICE_TYPE_P;
630 assert(slice_param->slice_type == SLICE_TYPE_B);
631 slice_type = SLICE_TYPE_B;
634 if (slice_type == SLICE_TYPE_I) {
635 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
636 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
639 } else if (slice_type == SLICE_TYPE_P) {
640 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
641 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
644 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
645 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
648 first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
649 slice_hor_pos = first_mb_in_slice % width_in_mbs;
650 slice_ver_pos = first_mb_in_slice / width_in_mbs;
652 if (next_slice_param) {
653 first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
654 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
655 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
657 next_slice_hor_pos = 0;
658 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
661 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663 OUT_BCS_BATCH(batch, slice_type);
665 (num_ref_idx_l1 << 24) |
666 (num_ref_idx_l0 << 16) |
667 (slice_param->chroma_log2_weight_denom << 8) |
668 (slice_param->luma_log2_weight_denom << 0));
670 (slice_param->direct_spatial_mv_pred_flag << 29) |
671 (slice_param->disable_deblocking_filter_idc << 27) |
672 (slice_param->cabac_init_idc << 24) |
673 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
677 (slice_ver_pos << 24) |
678 (slice_hor_pos << 16) |
679 (first_mb_in_slice << 0));
681 (next_slice_ver_pos << 16) |
682 (next_slice_hor_pos << 0));
684 (next_slice_param == NULL) << 19); /* last slice flag */
685 OUT_BCS_BATCH(batch, 0);
686 OUT_BCS_BATCH(batch, 0);
687 OUT_BCS_BATCH(batch, 0);
688 OUT_BCS_BATCH(batch, 0);
689 ADVANCE_BCS_BATCH(batch);
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694 VAPictureParameterBufferH264 *pic_param,
695 VASliceParameterBufferH264 *slice_param,
696 struct gen7_mfd_context *gen7_mfd_context)
698 gen6_send_avc_ref_idx_state(
699 gen7_mfd_context->base.batch,
701 gen7_mfd_context->reference_surface
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707 VAPictureParameterBufferH264 *pic_param,
708 VASliceParameterBufferH264 *slice_param,
709 struct gen7_mfd_context *gen7_mfd_context)
711 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712 int i, j, num_weight_offset_table = 0;
713 short weightoffsets[32 * 6];
715 if ((slice_param->slice_type == SLICE_TYPE_P ||
716 slice_param->slice_type == SLICE_TYPE_SP) &&
717 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718 num_weight_offset_table = 1;
721 if ((slice_param->slice_type == SLICE_TYPE_B) &&
722 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723 num_weight_offset_table = 2;
726 for (i = 0; i < num_weight_offset_table; i++) {
727 BEGIN_BCS_BATCH(batch, 98);
728 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729 OUT_BCS_BATCH(batch, i);
732 for (j = 0; j < 32; j++) {
733 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
741 for (j = 0; j < 32; j++) {
742 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
751 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752 ADVANCE_BCS_BATCH(batch);
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758 VAPictureParameterBufferH264 *pic_param,
759 VASliceParameterBufferH264 *slice_param,
760 dri_bo *slice_data_bo,
761 VASliceParameterBufferH264 *next_slice_param,
762 struct gen7_mfd_context *gen7_mfd_context)
764 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
767 pic_param->pic_fields.bits.entropy_coding_mode_flag);
769 /* the input bitsteam format on GEN7 differs from GEN6 */
770 BEGIN_BCS_BATCH(batch, 6);
771 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
773 (slice_param->slice_data_size));
774 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
782 ((slice_data_bit_offset >> 3) << 16) |
786 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787 (slice_data_bit_offset & 0x7));
788 OUT_BCS_BATCH(batch, 0);
789 ADVANCE_BCS_BATCH(batch);
793 gen8_mfd_avc_context_init(
794 VADriverContextP ctx,
795 struct gen7_mfd_context *gen7_mfd_context
798 /* Initialize flat scaling lists */
799 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804 struct decode_state *decode_state,
805 struct gen7_mfd_context *gen7_mfd_context)
807 VAPictureParameterBufferH264 *pic_param;
808 VASliceParameterBufferH264 *slice_param;
809 struct i965_driver_data *i965 = i965_driver_data(ctx);
810 struct object_surface *obj_surface;
812 int i, j, enable_avc_ildb = 0;
813 unsigned int width_in_mbs, height_in_mbs;
815 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
819 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821 assert((slice_param->slice_type == SLICE_TYPE_I) ||
822 (slice_param->slice_type == SLICE_TYPE_SI) ||
823 (slice_param->slice_type == SLICE_TYPE_P) ||
824 (slice_param->slice_type == SLICE_TYPE_SP) ||
825 (slice_param->slice_type == SLICE_TYPE_B));
827 if (slice_param->disable_deblocking_filter_idc != 1) {
836 assert(decode_state->pic_param && decode_state->pic_param->buffer);
837 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838 intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
839 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
840 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
841 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
842 assert(height_in_mbs > 0 && height_in_mbs <= 256);
844 /* Current decoded picture */
845 obj_surface = decode_state->render_object;
846 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
847 obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
848 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
850 /* initial uv component for YUV400 case */
851 if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
852 unsigned int uv_offset = obj_surface->width * obj_surface->height;
853 unsigned int uv_size = obj_surface->width * obj_surface->height / 2;
855 drm_intel_gem_bo_map_gtt(obj_surface->bo);
856 memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
857 drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
860 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
862 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
863 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
864 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
865 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
867 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
868 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
869 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
870 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
872 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
873 bo = dri_bo_alloc(i965->intel.bufmgr,
878 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
879 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
881 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
882 bo = dri_bo_alloc(i965->intel.bufmgr,
883 "deblocking filter row store",
884 width_in_mbs * 64 * 4,
887 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
888 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
890 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
891 bo = dri_bo_alloc(i965->intel.bufmgr,
893 width_in_mbs * 64 * 2,
896 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
897 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
899 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
900 bo = dri_bo_alloc(i965->intel.bufmgr,
902 width_in_mbs * 64 * 2,
905 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
906 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
908 gen7_mfd_context->bitplane_read_buffer.valid = 0;
912 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
913 struct decode_state *decode_state,
914 struct gen7_mfd_context *gen7_mfd_context)
916 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
917 VAPictureParameterBufferH264 *pic_param;
918 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
919 dri_bo *slice_data_bo;
922 assert(decode_state->pic_param && decode_state->pic_param->buffer);
923 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
924 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
926 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
927 intel_batchbuffer_emit_mi_flush(batch);
928 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
929 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
930 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
931 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
932 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
933 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
934 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
936 for (j = 0; j < decode_state->num_slice_params; j++) {
937 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
938 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
939 slice_data_bo = decode_state->slice_datas[j]->bo;
940 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
942 if (j == decode_state->num_slice_params - 1)
943 next_slice_group_param = NULL;
945 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
947 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
948 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
949 assert((slice_param->slice_type == SLICE_TYPE_I) ||
950 (slice_param->slice_type == SLICE_TYPE_SI) ||
951 (slice_param->slice_type == SLICE_TYPE_P) ||
952 (slice_param->slice_type == SLICE_TYPE_SP) ||
953 (slice_param->slice_type == SLICE_TYPE_B));
955 if (i < decode_state->slice_params[j]->num_elements - 1)
956 next_slice_param = slice_param + 1;
958 next_slice_param = next_slice_group_param;
960 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
961 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
962 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
963 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
964 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
969 intel_batchbuffer_end_atomic(batch);
970 intel_batchbuffer_flush(batch);
974 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
975 struct decode_state *decode_state,
976 struct gen7_mfd_context *gen7_mfd_context)
978 VAPictureParameterBufferMPEG2 *pic_param;
979 struct i965_driver_data *i965 = i965_driver_data(ctx);
980 struct object_surface *obj_surface;
982 unsigned int width_in_mbs;
984 assert(decode_state->pic_param && decode_state->pic_param->buffer);
985 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
986 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
988 mpeg2_set_reference_surfaces(
990 gen7_mfd_context->reference_surface,
995 /* Current decoded picture */
996 obj_surface = decode_state->render_object;
997 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
999 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1000 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1001 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1002 gen7_mfd_context->pre_deblocking_output.valid = 1;
1004 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1005 bo = dri_bo_alloc(i965->intel.bufmgr,
1006 "bsd mpc row store",
1010 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1011 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1013 gen7_mfd_context->post_deblocking_output.valid = 0;
1014 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1015 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1016 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1017 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1021 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1022 struct decode_state *decode_state,
1023 struct gen7_mfd_context *gen7_mfd_context)
1025 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1026 VAPictureParameterBufferMPEG2 *pic_param;
1027 unsigned int slice_concealment_disable_bit = 0;
1029 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1030 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1032 slice_concealment_disable_bit = 1;
1034 BEGIN_BCS_BATCH(batch, 13);
1035 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1036 OUT_BCS_BATCH(batch,
1037 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1038 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1039 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1040 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1041 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1042 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1043 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1044 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1045 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1046 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1047 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1048 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1049 OUT_BCS_BATCH(batch,
1050 pic_param->picture_coding_type << 9);
1051 OUT_BCS_BATCH(batch,
1052 (slice_concealment_disable_bit << 31) |
1053 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1054 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1055 OUT_BCS_BATCH(batch, 0);
1056 OUT_BCS_BATCH(batch, 0);
1057 OUT_BCS_BATCH(batch, 0);
1058 OUT_BCS_BATCH(batch, 0);
1059 OUT_BCS_BATCH(batch, 0);
1060 OUT_BCS_BATCH(batch, 0);
1061 OUT_BCS_BATCH(batch, 0);
1062 OUT_BCS_BATCH(batch, 0);
1063 OUT_BCS_BATCH(batch, 0);
1064 ADVANCE_BCS_BATCH(batch);
1068 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1069 struct decode_state *decode_state,
1070 struct gen7_mfd_context *gen7_mfd_context)
1072 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1075 /* Update internal QM state */
1076 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1077 VAIQMatrixBufferMPEG2 * const iq_matrix =
1078 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1080 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1081 iq_matrix->load_intra_quantiser_matrix) {
1082 gen_iq_matrix->load_intra_quantiser_matrix =
1083 iq_matrix->load_intra_quantiser_matrix;
1084 if (iq_matrix->load_intra_quantiser_matrix) {
1085 for (j = 0; j < 64; j++)
1086 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1087 iq_matrix->intra_quantiser_matrix[j];
1091 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1092 iq_matrix->load_non_intra_quantiser_matrix) {
1093 gen_iq_matrix->load_non_intra_quantiser_matrix =
1094 iq_matrix->load_non_intra_quantiser_matrix;
1095 if (iq_matrix->load_non_intra_quantiser_matrix) {
1096 for (j = 0; j < 64; j++)
1097 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1098 iq_matrix->non_intra_quantiser_matrix[j];
1103 /* Commit QM state to HW */
1104 for (i = 0; i < 2; i++) {
1105 unsigned char *qm = NULL;
1109 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1110 qm = gen_iq_matrix->intra_quantiser_matrix;
1111 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1114 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1115 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1116 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1123 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1128 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1129 VAPictureParameterBufferMPEG2 *pic_param,
1130 VASliceParameterBufferMPEG2 *slice_param,
1131 VASliceParameterBufferMPEG2 *next_slice_param,
1132 struct gen7_mfd_context *gen7_mfd_context)
1134 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1135 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1136 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1138 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1139 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1141 is_field_pic_wa = is_field_pic &&
1142 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1144 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1145 hpos0 = slice_param->slice_horizontal_position;
1147 if (next_slice_param == NULL) {
1148 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1151 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1152 hpos1 = next_slice_param->slice_horizontal_position;
1155 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1157 BEGIN_BCS_BATCH(batch, 5);
1158 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1159 OUT_BCS_BATCH(batch,
1160 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1161 OUT_BCS_BATCH(batch,
1162 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1163 OUT_BCS_BATCH(batch,
1167 (next_slice_param == NULL) << 5 |
1168 (next_slice_param == NULL) << 3 |
1169 (slice_param->macroblock_offset & 0x7));
1170 OUT_BCS_BATCH(batch,
1171 (slice_param->quantiser_scale_code << 24) |
1172 (vpos1 << 8 | hpos1));
1173 ADVANCE_BCS_BATCH(batch);
1177 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1178 struct decode_state *decode_state,
1179 struct gen7_mfd_context *gen7_mfd_context)
1181 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1182 VAPictureParameterBufferMPEG2 *pic_param;
1183 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1184 dri_bo *slice_data_bo;
1187 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1188 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1190 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1191 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1192 intel_batchbuffer_emit_mi_flush(batch);
1193 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1194 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1195 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1196 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1197 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1198 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1200 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1201 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1202 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1204 for (j = 0; j < decode_state->num_slice_params; j++) {
1205 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1206 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1207 slice_data_bo = decode_state->slice_datas[j]->bo;
1208 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1210 if (j == decode_state->num_slice_params - 1)
1211 next_slice_group_param = NULL;
1213 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1215 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1216 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1218 if (i < decode_state->slice_params[j]->num_elements - 1)
1219 next_slice_param = slice_param + 1;
1221 next_slice_param = next_slice_group_param;
1223 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1228 intel_batchbuffer_end_atomic(batch);
1229 intel_batchbuffer_flush(batch);
1232 static const int va_to_gen7_vc1_pic_type[5] = {
1236 GEN7_VC1_BI_PICTURE,
1240 static const int va_to_gen7_vc1_mv[4] = {
1242 2, /* 1-MV half-pel */
1243 3, /* 1-MV half-pef bilinear */
1247 static const int b_picture_scale_factor[21] = {
1248 128, 85, 170, 64, 192,
1249 51, 102, 153, 204, 43,
1250 215, 37, 74, 111, 148,
1251 185, 222, 32, 96, 160,
1255 static const int va_to_gen7_vc1_condover[3] = {
1261 static const int va_to_gen7_vc1_profile[4] = {
1262 GEN7_VC1_SIMPLE_PROFILE,
1263 GEN7_VC1_MAIN_PROFILE,
1264 GEN7_VC1_RESERVED_PROFILE,
1265 GEN7_VC1_ADVANCED_PROFILE
1269 gen8_mfd_free_vc1_surface(void **data)
1271 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1273 if (!gen7_vc1_surface)
1276 dri_bo_unreference(gen7_vc1_surface->dmv);
1277 free(gen7_vc1_surface);
1282 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1283 VAPictureParameterBufferVC1 *pic_param,
1284 struct object_surface *obj_surface)
1286 struct i965_driver_data *i965 = i965_driver_data(ctx);
1287 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1288 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1289 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1291 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1293 if (!gen7_vc1_surface) {
1294 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1295 assert((obj_surface->size & 0x3f) == 0);
1296 obj_surface->private_data = gen7_vc1_surface;
1299 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1301 if (gen7_vc1_surface->dmv == NULL) {
1302 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1303 "direct mv w/r buffer",
1304 width_in_mbs * height_in_mbs * 64,
1310 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1311 struct decode_state *decode_state,
1312 struct gen7_mfd_context *gen7_mfd_context)
1314 VAPictureParameterBufferVC1 *pic_param;
1315 struct i965_driver_data *i965 = i965_driver_data(ctx);
1316 struct object_surface *obj_surface;
1321 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1322 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1323 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1324 picture_type = pic_param->picture_fields.bits.picture_type;
1326 intel_update_vc1_frame_store_index(ctx,
1329 gen7_mfd_context->reference_surface);
1331 /* Current decoded picture */
1332 obj_surface = decode_state->render_object;
1333 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1334 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1336 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1337 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1338 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1339 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1341 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1342 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1343 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1344 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1346 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1347 bo = dri_bo_alloc(i965->intel.bufmgr,
1352 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1353 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1355 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1356 bo = dri_bo_alloc(i965->intel.bufmgr,
1357 "deblocking filter row store",
1358 width_in_mbs * 7 * 64,
1361 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1362 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1364 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1365 bo = dri_bo_alloc(i965->intel.bufmgr,
1366 "bsd mpc row store",
1370 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1371 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1373 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1375 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1376 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1378 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1379 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1380 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1381 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1383 uint8_t *src = NULL, *dst = NULL;
1385 assert(decode_state->bit_plane->buffer);
1386 src = decode_state->bit_plane->buffer;
1388 bo = dri_bo_alloc(i965->intel.bufmgr,
1390 bitplane_width * height_in_mbs,
1393 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1395 dri_bo_map(bo, True);
1396 assert(bo->virtual);
1399 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1400 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1401 int src_index, dst_index;
1405 src_index = (src_h * width_in_mbs + src_w) / 2;
1406 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1407 src_value = ((src[src_index] >> src_shift) & 0xf);
1409 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1413 dst_index = src_w / 2;
1414 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1418 dst[src_w / 2] >>= 4;
1420 dst += bitplane_width;
1425 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1429 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1430 struct decode_state *decode_state,
1431 struct gen7_mfd_context *gen7_mfd_context)
1433 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1434 VAPictureParameterBufferVC1 *pic_param;
1435 struct object_surface *obj_surface;
1436 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1437 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1438 int unified_mv_mode;
1439 int ref_field_pic_polarity = 0;
1440 int scale_factor = 0;
1442 int dmv_surface_valid = 0;
1448 int interpolation_mode = 0;
1450 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1451 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1453 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1454 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1455 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1456 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1457 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1458 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1459 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1460 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1463 alt_pquant_config = 0;
1464 alt_pquant_edge_mask = 0;
1465 } else if (dquant == 2) {
1466 alt_pquant_config = 1;
1467 alt_pquant_edge_mask = 0xf;
1469 assert(dquant == 1);
1470 if (dquantfrm == 0) {
1471 alt_pquant_config = 0;
1472 alt_pquant_edge_mask = 0;
1475 assert(dquantfrm == 1);
1476 alt_pquant_config = 1;
1478 switch (dqprofile) {
1480 if (dqbilevel == 0) {
1481 alt_pquant_config = 2;
1482 alt_pquant_edge_mask = 0;
1484 assert(dqbilevel == 1);
1485 alt_pquant_config = 3;
1486 alt_pquant_edge_mask = 0;
1491 alt_pquant_edge_mask = 0xf;
1496 alt_pquant_edge_mask = 0x9;
1498 alt_pquant_edge_mask = (0x3 << dqdbedge);
1503 alt_pquant_edge_mask = (0x1 << dqsbedge);
1512 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1513 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1514 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1516 assert(pic_param->mv_fields.bits.mv_mode < 4);
1517 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1520 if (pic_param->sequence_fields.bits.interlace == 1 &&
1521 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1522 /* FIXME: calculate reference field picture polarity */
1524 ref_field_pic_polarity = 0;
1527 if (pic_param->b_picture_fraction < 21)
1528 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1530 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1532 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1533 picture_type == GEN7_VC1_I_PICTURE)
1534 picture_type = GEN7_VC1_BI_PICTURE;
1536 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1537 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1539 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1542 * 8.3.6.2.1 Transform Type Selection
1543 * If variable-sized transform coding is not enabled,
1544 * then the 8x8 transform shall be used for all blocks.
1545 * it is also MFX_VC1_PIC_STATE requirement.
1547 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1548 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1549 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1553 if (picture_type == GEN7_VC1_B_PICTURE) {
1554 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1556 obj_surface = decode_state->reference_objects[1];
1559 gen7_vc1_surface = obj_surface->private_data;
1561 if (!gen7_vc1_surface ||
1562 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1563 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1564 dmv_surface_valid = 0;
1566 dmv_surface_valid = 1;
1569 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1571 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1572 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1574 if (pic_param->picture_fields.bits.top_field_first)
1580 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1581 brfd = pic_param->reference_fields.bits.reference_distance;
1582 brfd = (scale_factor * brfd) >> 8;
1583 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1590 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1591 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1592 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1596 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1597 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1600 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1601 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1602 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1604 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1605 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1611 assert(pic_param->conditional_overlap_flag < 3);
1612 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1614 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1615 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1616 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1617 interpolation_mode = 9; /* Half-pel bilinear */
1618 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1619 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1620 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1621 interpolation_mode = 1; /* Half-pel bicubic */
1623 interpolation_mode = 0; /* Quarter-pel bicubic */
1625 BEGIN_BCS_BATCH(batch, 6);
1626 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1627 OUT_BCS_BATCH(batch,
1628 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1629 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1630 OUT_BCS_BATCH(batch,
1631 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1632 dmv_surface_valid << 15 |
1633 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1634 pic_param->rounding_control << 13 |
1635 pic_param->sequence_fields.bits.syncmarker << 12 |
1636 interpolation_mode << 8 |
1637 0 << 7 | /* FIXME: scale up or down ??? */
1638 pic_param->range_reduction_frame << 6 |
1639 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1641 !pic_param->picture_fields.bits.is_first_field << 3 |
1642 (pic_param->sequence_fields.bits.profile == 3) << 0);
1643 OUT_BCS_BATCH(batch,
1644 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1645 picture_type << 26 |
1648 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1650 OUT_BCS_BATCH(batch,
1651 unified_mv_mode << 28 |
1652 pic_param->mv_fields.bits.four_mv_switch << 27 |
1653 pic_param->fast_uvmc_flag << 26 |
1654 ref_field_pic_polarity << 25 |
1655 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1656 pic_param->reference_fields.bits.reference_distance << 20 |
1657 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1658 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1659 pic_param->mv_fields.bits.extended_mv_range << 8 |
1660 alt_pquant_edge_mask << 4 |
1661 alt_pquant_config << 2 |
1662 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1663 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1664 OUT_BCS_BATCH(batch,
1665 !!pic_param->bitplane_present.value << 31 |
1666 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1667 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1668 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1669 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1670 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1671 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1672 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1673 pic_param->mv_fields.bits.mv_table << 20 |
1674 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1675 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1676 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1677 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1678 pic_param->mb_mode_table << 8 |
1680 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1681 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1682 pic_param->cbp_table << 0);
1683 ADVANCE_BCS_BATCH(batch);
1687 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1688 struct decode_state *decode_state,
1689 struct gen7_mfd_context *gen7_mfd_context)
1691 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1692 VAPictureParameterBufferVC1 *pic_param;
1693 int intensitycomp_single;
1695 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1696 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1698 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1699 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1700 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1702 BEGIN_BCS_BATCH(batch, 6);
1703 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1704 OUT_BCS_BATCH(batch,
1705 0 << 14 | /* FIXME: double ??? */
1707 intensitycomp_single << 10 |
1708 intensitycomp_single << 8 |
1709 0 << 4 | /* FIXME: interlace mode */
1711 OUT_BCS_BATCH(batch,
1712 pic_param->luma_shift << 16 |
1713 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1714 OUT_BCS_BATCH(batch, 0);
1715 OUT_BCS_BATCH(batch, 0);
1716 OUT_BCS_BATCH(batch, 0);
1717 ADVANCE_BCS_BATCH(batch);
1721 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1722 struct decode_state *decode_state,
1723 struct gen7_mfd_context *gen7_mfd_context)
1725 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1726 struct object_surface *obj_surface;
1727 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1729 obj_surface = decode_state->render_object;
1731 if (obj_surface && obj_surface->private_data) {
1732 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1735 obj_surface = decode_state->reference_objects[1];
1737 if (obj_surface && obj_surface->private_data) {
1738 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1741 BEGIN_BCS_BATCH(batch, 7);
1742 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1744 if (dmv_write_buffer)
1745 OUT_BCS_RELOC(batch, dmv_write_buffer,
1746 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1749 OUT_BCS_BATCH(batch, 0);
1751 OUT_BCS_BATCH(batch, 0);
1752 OUT_BCS_BATCH(batch, 0);
1754 if (dmv_read_buffer)
1755 OUT_BCS_RELOC(batch, dmv_read_buffer,
1756 I915_GEM_DOMAIN_INSTRUCTION, 0,
1759 OUT_BCS_BATCH(batch, 0);
1761 OUT_BCS_BATCH(batch, 0);
1762 OUT_BCS_BATCH(batch, 0);
1764 ADVANCE_BCS_BATCH(batch);
1768 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1770 int out_slice_data_bit_offset;
1771 int slice_header_size = in_slice_data_bit_offset / 8;
1775 out_slice_data_bit_offset = in_slice_data_bit_offset;
1777 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1778 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1783 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1786 return out_slice_data_bit_offset;
1790 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1791 VAPictureParameterBufferVC1 *pic_param,
1792 VASliceParameterBufferVC1 *slice_param,
1793 VASliceParameterBufferVC1 *next_slice_param,
1794 dri_bo *slice_data_bo,
1795 struct gen7_mfd_context *gen7_mfd_context)
1797 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1798 int next_slice_start_vert_pos;
1799 int macroblock_offset;
1800 uint8_t *slice_data = NULL;
1802 dri_bo_map(slice_data_bo, 0);
1803 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1804 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1805 slice_param->macroblock_offset,
1806 pic_param->sequence_fields.bits.profile);
1807 dri_bo_unmap(slice_data_bo);
1809 if (next_slice_param)
1810 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1812 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1814 BEGIN_BCS_BATCH(batch, 5);
1815 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1816 OUT_BCS_BATCH(batch,
1817 slice_param->slice_data_size - (macroblock_offset >> 3));
1818 OUT_BCS_BATCH(batch,
1819 slice_param->slice_data_offset + (macroblock_offset >> 3));
1820 OUT_BCS_BATCH(batch,
1821 slice_param->slice_vertical_position << 16 |
1822 next_slice_start_vert_pos << 0);
1823 OUT_BCS_BATCH(batch,
1824 (macroblock_offset & 0x7));
1825 ADVANCE_BCS_BATCH(batch);
1829 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1830 struct decode_state *decode_state,
1831 struct gen7_mfd_context *gen7_mfd_context)
1833 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1834 VAPictureParameterBufferVC1 *pic_param;
1835 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1836 dri_bo *slice_data_bo;
1839 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1840 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1842 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1843 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1844 intel_batchbuffer_emit_mi_flush(batch);
1845 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1846 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1847 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1848 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1849 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1850 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1851 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1853 for (j = 0; j < decode_state->num_slice_params; j++) {
1854 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1855 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1856 slice_data_bo = decode_state->slice_datas[j]->bo;
1857 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1859 if (j == decode_state->num_slice_params - 1)
1860 next_slice_group_param = NULL;
1862 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1864 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1865 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1867 if (i < decode_state->slice_params[j]->num_elements - 1)
1868 next_slice_param = slice_param + 1;
1870 next_slice_param = next_slice_group_param;
1872 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1877 intel_batchbuffer_end_atomic(batch);
1878 intel_batchbuffer_flush(batch);
1882 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1883 struct decode_state *decode_state,
1884 struct gen7_mfd_context *gen7_mfd_context)
1886 struct object_surface *obj_surface;
1887 VAPictureParameterBufferJPEGBaseline *pic_param;
1888 int subsampling = SUBSAMPLE_YUV420;
1890 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1892 if (pic_param->num_components == 1)
1893 subsampling = SUBSAMPLE_YUV400;
1894 else if (pic_param->num_components == 3) {
1895 int h1 = pic_param->components[0].h_sampling_factor;
1896 int h2 = pic_param->components[1].h_sampling_factor;
1897 int h3 = pic_param->components[2].h_sampling_factor;
1898 int v1 = pic_param->components[0].v_sampling_factor;
1899 int v2 = pic_param->components[1].v_sampling_factor;
1900 int v3 = pic_param->components[2].v_sampling_factor;
1902 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1903 v1 == 2 && v2 == 1 && v3 == 1)
1904 subsampling = SUBSAMPLE_YUV420;
1905 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1906 v1 == 1 && v2 == 1 && v3 == 1)
1907 subsampling = SUBSAMPLE_YUV422H;
1908 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1909 v1 == 1 && v2 == 1 && v3 == 1)
1910 subsampling = SUBSAMPLE_YUV444;
1911 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1912 v1 == 1 && v2 == 1 && v3 == 1)
1913 subsampling = SUBSAMPLE_YUV411;
1914 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1915 v1 == 2 && v2 == 1 && v3 == 1)
1916 subsampling = SUBSAMPLE_YUV422V;
1917 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1918 v1 == 2 && v2 == 2 && v3 == 2)
1919 subsampling = SUBSAMPLE_YUV422H;
1920 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1921 v1 == 2 && v2 == 1 && v3 == 1)
1922 subsampling = SUBSAMPLE_YUV422V;
1929 /* Current decoded picture */
1930 obj_surface = decode_state->render_object;
1931 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
1933 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1934 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1935 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1936 gen7_mfd_context->pre_deblocking_output.valid = 1;
1938 gen7_mfd_context->post_deblocking_output.bo = NULL;
1939 gen7_mfd_context->post_deblocking_output.valid = 0;
1941 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1942 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1944 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1945 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1947 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1948 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1950 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1951 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1953 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1954 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1957 static const int va_to_gen7_jpeg_rotation[4] = {
1958 GEN7_JPEG_ROTATION_0,
1959 GEN7_JPEG_ROTATION_90,
1960 GEN7_JPEG_ROTATION_180,
1961 GEN7_JPEG_ROTATION_270
1965 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1966 struct decode_state *decode_state,
1967 struct gen7_mfd_context *gen7_mfd_context)
1969 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1970 VAPictureParameterBufferJPEGBaseline *pic_param;
1971 int chroma_type = GEN7_YUV420;
1972 int frame_width_in_blks;
1973 int frame_height_in_blks;
1975 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1976 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1978 if (pic_param->num_components == 1)
1979 chroma_type = GEN7_YUV400;
1980 else if (pic_param->num_components == 3) {
1981 int h1 = pic_param->components[0].h_sampling_factor;
1982 int h2 = pic_param->components[1].h_sampling_factor;
1983 int h3 = pic_param->components[2].h_sampling_factor;
1984 int v1 = pic_param->components[0].v_sampling_factor;
1985 int v2 = pic_param->components[1].v_sampling_factor;
1986 int v3 = pic_param->components[2].v_sampling_factor;
1988 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1989 v1 == 2 && v2 == 1 && v3 == 1)
1990 chroma_type = GEN7_YUV420;
1991 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1992 v1 == 1 && v2 == 1 && v3 == 1)
1993 chroma_type = GEN7_YUV422H_2Y;
1994 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1995 v1 == 1 && v2 == 1 && v3 == 1)
1996 chroma_type = GEN7_YUV444;
1997 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1998 v1 == 1 && v2 == 1 && v3 == 1)
1999 chroma_type = GEN7_YUV411;
2000 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2001 v1 == 2 && v2 == 1 && v3 == 1)
2002 chroma_type = GEN7_YUV422V_2Y;
2003 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2004 v1 == 2 && v2 == 2 && v3 == 2)
2005 chroma_type = GEN7_YUV422H_4Y;
2006 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2007 v1 == 2 && v2 == 1 && v3 == 1)
2008 chroma_type = GEN7_YUV422V_4Y;
2013 if (chroma_type == GEN7_YUV400 ||
2014 chroma_type == GEN7_YUV444 ||
2015 chroma_type == GEN7_YUV422V_2Y) {
2016 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2017 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2018 } else if (chroma_type == GEN7_YUV411) {
2019 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2020 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2022 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2023 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2026 BEGIN_BCS_BATCH(batch, 3);
2027 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2028 OUT_BCS_BATCH(batch,
2029 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2030 (chroma_type << 0));
2031 OUT_BCS_BATCH(batch,
2032 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2033 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2034 ADVANCE_BCS_BATCH(batch);
2037 static const int va_to_gen7_jpeg_hufftable[2] = {
2043 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2044 struct decode_state *decode_state,
2045 struct gen7_mfd_context *gen7_mfd_context,
2048 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2049 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2052 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2055 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2057 for (index = 0; index < num_tables; index++) {
2058 int id = va_to_gen7_jpeg_hufftable[index];
2059 if (!huffman_table->load_huffman_table[index])
2061 BEGIN_BCS_BATCH(batch, 53);
2062 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2063 OUT_BCS_BATCH(batch, id);
2064 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2065 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2066 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2067 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2068 ADVANCE_BCS_BATCH(batch);
2072 static const int va_to_gen7_jpeg_qm[5] = {
2074 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2075 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2076 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2077 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2081 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2082 struct decode_state *decode_state,
2083 struct gen7_mfd_context *gen7_mfd_context)
2085 VAPictureParameterBufferJPEGBaseline *pic_param;
2086 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2089 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2092 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2093 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2095 assert(pic_param->num_components <= 3);
2097 for (index = 0; index < pic_param->num_components; index++) {
2098 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2100 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2101 unsigned char raster_qm[64];
2104 if (id > 4 || id < 1)
2107 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2110 qm_type = va_to_gen7_jpeg_qm[id];
2112 for (j = 0; j < 64; j++)
2113 raster_qm[zigzag_direct[j]] = qm[j];
2115 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2120 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2121 VAPictureParameterBufferJPEGBaseline *pic_param,
2122 VASliceParameterBufferJPEGBaseline *slice_param,
2123 VASliceParameterBufferJPEGBaseline *next_slice_param,
2124 dri_bo *slice_data_bo,
2125 struct gen7_mfd_context *gen7_mfd_context)
2127 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2128 int scan_component_mask = 0;
2131 assert(slice_param->num_components > 0);
2132 assert(slice_param->num_components < 4);
2133 assert(slice_param->num_components <= pic_param->num_components);
2135 for (i = 0; i < slice_param->num_components; i++) {
2136 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2138 scan_component_mask |= (1 << 0);
2141 scan_component_mask |= (1 << 1);
2144 scan_component_mask |= (1 << 2);
2152 BEGIN_BCS_BATCH(batch, 6);
2153 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2154 OUT_BCS_BATCH(batch,
2155 slice_param->slice_data_size);
2156 OUT_BCS_BATCH(batch,
2157 slice_param->slice_data_offset);
2158 OUT_BCS_BATCH(batch,
2159 slice_param->slice_horizontal_position << 16 |
2160 slice_param->slice_vertical_position << 0);
2161 OUT_BCS_BATCH(batch,
2162 ((slice_param->num_components != 1) << 30) | /* interleaved */
2163 (scan_component_mask << 27) | /* scan components */
2164 (0 << 26) | /* disable interrupt allowed */
2165 (slice_param->num_mcus << 0)); /* MCU count */
2166 OUT_BCS_BATCH(batch,
2167 (slice_param->restart_interval << 0)); /* RestartInterval */
2168 ADVANCE_BCS_BATCH(batch);
2171 /* Workaround for JPEG decoding on Ivybridge */
2175 i965_DestroySurfaces(VADriverContextP ctx,
2176 VASurfaceID *surface_list,
2179 i965_CreateSurfaces(VADriverContextP ctx,
2184 VASurfaceID *surfaces);
2189 unsigned char data[32];
2191 int data_bit_offset;
2193 } gen7_jpeg_wa_clip = {
2197 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2198 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2206 gen8_jpeg_wa_init(VADriverContextP ctx,
2207 struct gen7_mfd_context *gen7_mfd_context)
2209 struct i965_driver_data *i965 = i965_driver_data(ctx);
2211 struct object_surface *obj_surface;
2213 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2214 i965_DestroySurfaces(ctx,
2215 &gen7_mfd_context->jpeg_wa_surface_id,
2218 status = i965_CreateSurfaces(ctx,
2219 gen7_jpeg_wa_clip.width,
2220 gen7_jpeg_wa_clip.height,
2221 VA_RT_FORMAT_YUV420,
2223 &gen7_mfd_context->jpeg_wa_surface_id);
2224 assert(status == VA_STATUS_SUCCESS);
2226 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2227 assert(obj_surface);
2228 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2229 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2231 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2232 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2236 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2238 gen7_jpeg_wa_clip.data_size,
2239 gen7_jpeg_wa_clip.data);
2244 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2245 struct gen7_mfd_context *gen7_mfd_context)
2247 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2249 BEGIN_BCS_BATCH(batch, 5);
2250 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2251 OUT_BCS_BATCH(batch,
2252 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2253 (MFD_MODE_VLD << 15) | /* VLD mode */
2254 (0 << 10) | /* disable Stream-Out */
2255 (0 << 9) | /* Post Deblocking Output */
2256 (1 << 8) | /* Pre Deblocking Output */
2257 (0 << 5) | /* not in stitch mode */
2258 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2259 (MFX_FORMAT_AVC << 0));
2260 OUT_BCS_BATCH(batch,
2261 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2262 (0 << 3) | /* terminate if AVC mbdata error occurs */
2263 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2266 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2267 OUT_BCS_BATCH(batch, 0); /* reserved */
2268 ADVANCE_BCS_BATCH(batch);
2272 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2273 struct gen7_mfd_context *gen7_mfd_context)
2275 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2276 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2278 BEGIN_BCS_BATCH(batch, 6);
2279 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2280 OUT_BCS_BATCH(batch, 0);
2281 OUT_BCS_BATCH(batch,
2282 ((obj_surface->orig_width - 1) << 18) |
2283 ((obj_surface->orig_height - 1) << 4));
2284 OUT_BCS_BATCH(batch,
2285 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2286 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2287 (0 << 22) | /* surface object control state, ignored */
2288 ((obj_surface->width - 1) << 3) | /* pitch */
2289 (0 << 2) | /* must be 0 */
2290 (1 << 1) | /* must be tiled */
2291 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2292 OUT_BCS_BATCH(batch,
2293 (0 << 16) | /* X offset for U(Cb), must be 0 */
2294 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2295 OUT_BCS_BATCH(batch,
2296 (0 << 16) | /* X offset for V(Cr), must be 0 */
2297 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2298 ADVANCE_BCS_BATCH(batch);
2302 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2303 struct gen7_mfd_context *gen7_mfd_context)
2305 struct i965_driver_data *i965 = i965_driver_data(ctx);
2306 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2307 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2311 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2316 BEGIN_BCS_BATCH(batch, 61);
2317 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2318 OUT_BCS_RELOC(batch,
2320 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2322 OUT_BCS_BATCH(batch, 0);
2323 OUT_BCS_BATCH(batch, 0);
2326 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2327 OUT_BCS_BATCH(batch, 0);
2328 OUT_BCS_BATCH(batch, 0);
2330 /* uncompressed-video & stream out 7-12 */
2331 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2332 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2333 OUT_BCS_BATCH(batch, 0);
2334 OUT_BCS_BATCH(batch, 0);
2335 OUT_BCS_BATCH(batch, 0);
2336 OUT_BCS_BATCH(batch, 0);
2338 /* the DW 13-15 is for intra row store scratch */
2339 OUT_BCS_RELOC(batch,
2341 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2343 OUT_BCS_BATCH(batch, 0);
2344 OUT_BCS_BATCH(batch, 0);
2346 /* the DW 16-18 is for deblocking filter */
2347 OUT_BCS_BATCH(batch, 0);
2348 OUT_BCS_BATCH(batch, 0);
2349 OUT_BCS_BATCH(batch, 0);
2352 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2353 OUT_BCS_BATCH(batch, 0);
2354 OUT_BCS_BATCH(batch, 0);
2356 OUT_BCS_BATCH(batch, 0);
2358 /* the DW52-54 is for mb status address */
2359 OUT_BCS_BATCH(batch, 0);
2360 OUT_BCS_BATCH(batch, 0);
2361 OUT_BCS_BATCH(batch, 0);
2362 /* the DW56-60 is for ILDB & second ILDB address */
2363 OUT_BCS_BATCH(batch, 0);
2364 OUT_BCS_BATCH(batch, 0);
2365 OUT_BCS_BATCH(batch, 0);
2366 OUT_BCS_BATCH(batch, 0);
2367 OUT_BCS_BATCH(batch, 0);
2368 OUT_BCS_BATCH(batch, 0);
2370 ADVANCE_BCS_BATCH(batch);
2372 dri_bo_unreference(intra_bo);
2376 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2377 struct gen7_mfd_context *gen7_mfd_context)
2379 struct i965_driver_data *i965 = i965_driver_data(ctx);
2380 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2381 dri_bo *bsd_mpc_bo, *mpr_bo;
2383 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2384 "bsd mpc row store",
2385 11520, /* 1.5 * 120 * 64 */
2388 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2390 7680, /* 1. 0 * 120 * 64 */
2393 BEGIN_BCS_BATCH(batch, 10);
2394 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2396 OUT_BCS_RELOC(batch,
2398 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2401 OUT_BCS_BATCH(batch, 0);
2402 OUT_BCS_BATCH(batch, 0);
2404 OUT_BCS_RELOC(batch,
2406 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2408 OUT_BCS_BATCH(batch, 0);
2409 OUT_BCS_BATCH(batch, 0);
2411 OUT_BCS_BATCH(batch, 0);
2412 OUT_BCS_BATCH(batch, 0);
2413 OUT_BCS_BATCH(batch, 0);
2415 ADVANCE_BCS_BATCH(batch);
2417 dri_bo_unreference(bsd_mpc_bo);
2418 dri_bo_unreference(mpr_bo);
2422 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2423 struct gen7_mfd_context *gen7_mfd_context)
2429 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2430 struct gen7_mfd_context *gen7_mfd_context)
2432 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2434 int mbaff_frame_flag = 0;
2435 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2437 BEGIN_BCS_BATCH(batch, 16);
2438 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2439 OUT_BCS_BATCH(batch,
2440 width_in_mbs * height_in_mbs);
2441 OUT_BCS_BATCH(batch,
2442 ((height_in_mbs - 1) << 16) |
2443 ((width_in_mbs - 1) << 0));
2444 OUT_BCS_BATCH(batch,
2449 (0 << 12) | /* differ from GEN6 */
2452 OUT_BCS_BATCH(batch,
2453 (1 << 10) | /* 4:2:0 */
2454 (1 << 7) | /* CABAC */
2460 (mbaff_frame_flag << 1) |
2462 OUT_BCS_BATCH(batch, 0);
2463 OUT_BCS_BATCH(batch, 0);
2464 OUT_BCS_BATCH(batch, 0);
2465 OUT_BCS_BATCH(batch, 0);
2466 OUT_BCS_BATCH(batch, 0);
2467 OUT_BCS_BATCH(batch, 0);
2468 OUT_BCS_BATCH(batch, 0);
2469 OUT_BCS_BATCH(batch, 0);
2470 OUT_BCS_BATCH(batch, 0);
2471 OUT_BCS_BATCH(batch, 0);
2472 OUT_BCS_BATCH(batch, 0);
2473 ADVANCE_BCS_BATCH(batch);
2477 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2478 struct gen7_mfd_context *gen7_mfd_context)
2480 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2483 BEGIN_BCS_BATCH(batch, 71);
2484 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2486 /* reference surfaces 0..15 */
2487 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2488 OUT_BCS_BATCH(batch, 0); /* top */
2489 OUT_BCS_BATCH(batch, 0); /* bottom */
2492 OUT_BCS_BATCH(batch, 0);
2494 /* the current decoding frame/field */
2495 OUT_BCS_BATCH(batch, 0); /* top */
2496 OUT_BCS_BATCH(batch, 0);
2497 OUT_BCS_BATCH(batch, 0);
2500 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2501 OUT_BCS_BATCH(batch, 0);
2502 OUT_BCS_BATCH(batch, 0);
2505 OUT_BCS_BATCH(batch, 0);
2506 OUT_BCS_BATCH(batch, 0);
2508 ADVANCE_BCS_BATCH(batch);
2512 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2513 struct gen7_mfd_context *gen7_mfd_context)
2515 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2517 BEGIN_BCS_BATCH(batch, 11);
2518 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2519 OUT_BCS_RELOC(batch,
2520 gen7_mfd_context->jpeg_wa_slice_data_bo,
2521 I915_GEM_DOMAIN_INSTRUCTION, 0,
2523 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2524 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2525 OUT_BCS_BATCH(batch, 0);
2526 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2527 OUT_BCS_BATCH(batch, 0);
2528 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2529 OUT_BCS_BATCH(batch, 0);
2530 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2531 OUT_BCS_BATCH(batch, 0);
2532 ADVANCE_BCS_BATCH(batch);
2536 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2537 struct gen7_mfd_context *gen7_mfd_context)
2539 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2541 /* the input bitsteam format on GEN7 differs from GEN6 */
2542 BEGIN_BCS_BATCH(batch, 6);
2543 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2544 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2545 OUT_BCS_BATCH(batch, 0);
2546 OUT_BCS_BATCH(batch,
2552 OUT_BCS_BATCH(batch,
2553 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2556 (1 << 3) | /* LastSlice Flag */
2557 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2558 OUT_BCS_BATCH(batch, 0);
2559 ADVANCE_BCS_BATCH(batch);
2563 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2564 struct gen7_mfd_context *gen7_mfd_context)
2566 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2567 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2568 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2569 int first_mb_in_slice = 0;
2570 int slice_type = SLICE_TYPE_I;
2572 BEGIN_BCS_BATCH(batch, 11);
2573 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2574 OUT_BCS_BATCH(batch, slice_type);
2575 OUT_BCS_BATCH(batch,
2576 (num_ref_idx_l1 << 24) |
2577 (num_ref_idx_l0 << 16) |
2580 OUT_BCS_BATCH(batch,
2582 (1 << 27) | /* disable Deblocking */
2584 (gen7_jpeg_wa_clip.qp << 16) |
2587 OUT_BCS_BATCH(batch,
2588 (slice_ver_pos << 24) |
2589 (slice_hor_pos << 16) |
2590 (first_mb_in_slice << 0));
2591 OUT_BCS_BATCH(batch,
2592 (next_slice_ver_pos << 16) |
2593 (next_slice_hor_pos << 0));
2594 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2595 OUT_BCS_BATCH(batch, 0);
2596 OUT_BCS_BATCH(batch, 0);
2597 OUT_BCS_BATCH(batch, 0);
2598 OUT_BCS_BATCH(batch, 0);
2599 ADVANCE_BCS_BATCH(batch);
2603 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2604 struct gen7_mfd_context *gen7_mfd_context)
2606 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2607 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2608 intel_batchbuffer_emit_mi_flush(batch);
2609 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2610 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2611 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2612 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2613 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2614 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2615 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2617 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2618 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2619 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2625 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2626 struct decode_state *decode_state,
2627 struct gen7_mfd_context *gen7_mfd_context)
2629 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2630 VAPictureParameterBufferJPEGBaseline *pic_param;
2631 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2632 dri_bo *slice_data_bo;
2633 int i, j, max_selector = 0;
2635 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2636 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2638 /* Currently only support Baseline DCT */
2639 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2640 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2642 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2644 intel_batchbuffer_emit_mi_flush(batch);
2645 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2646 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2647 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2648 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2649 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2651 for (j = 0; j < decode_state->num_slice_params; j++) {
2652 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2653 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2654 slice_data_bo = decode_state->slice_datas[j]->bo;
2655 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2657 if (j == decode_state->num_slice_params - 1)
2658 next_slice_group_param = NULL;
2660 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2662 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2665 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2667 if (i < decode_state->slice_params[j]->num_elements - 1)
2668 next_slice_param = slice_param + 1;
2670 next_slice_param = next_slice_group_param;
2672 for (component = 0; component < slice_param->num_components; component++) {
2673 if (max_selector < slice_param->components[component].dc_table_selector)
2674 max_selector = slice_param->components[component].dc_table_selector;
2676 if (max_selector < slice_param->components[component].ac_table_selector)
2677 max_selector = slice_param->components[component].ac_table_selector;
2684 assert(max_selector < 2);
2685 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2687 for (j = 0; j < decode_state->num_slice_params; j++) {
2688 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2689 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2690 slice_data_bo = decode_state->slice_datas[j]->bo;
2691 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2693 if (j == decode_state->num_slice_params - 1)
2694 next_slice_group_param = NULL;
2696 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2698 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2699 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2701 if (i < decode_state->slice_params[j]->num_elements - 1)
2702 next_slice_param = slice_param + 1;
2704 next_slice_param = next_slice_group_param;
2706 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2711 intel_batchbuffer_end_atomic(batch);
2712 intel_batchbuffer_flush(batch);
2716 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2717 struct decode_state *decode_state,
2718 struct gen7_mfd_context *gen7_mfd_context)
2720 struct object_surface *obj_surface;
2721 struct i965_driver_data *i965 = i965_driver_data(ctx);
2723 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2724 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2725 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2727 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2728 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2730 /* Current decoded picture */
2731 obj_surface = decode_state->render_object;
2732 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2734 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2735 gen7_mfd_context->post_deblocking_output.bo = NULL;
2736 gen7_mfd_context->post_deblocking_output.valid = 0;
2738 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2739 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2740 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2741 gen7_mfd_context->pre_deblocking_output.valid = 1;
2743 /* The same as AVC */
2744 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2745 bo = dri_bo_alloc(i965->intel.bufmgr,
2750 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2751 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2753 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2754 bo = dri_bo_alloc(i965->intel.bufmgr,
2755 "deblocking filter row store",
2756 width_in_mbs * 64 * 4,
2759 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2760 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2762 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2763 bo = dri_bo_alloc(i965->intel.bufmgr,
2764 "bsd mpc row store",
2765 width_in_mbs * 64 * 2,
2768 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2769 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2771 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2772 bo = dri_bo_alloc(i965->intel.bufmgr,
2774 width_in_mbs * 64 * 2,
2777 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2778 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2780 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2784 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2785 struct decode_state *decode_state,
2786 struct gen7_mfd_context *gen7_mfd_context)
2788 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2789 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2790 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2791 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2792 dri_bo *probs_bo = decode_state->probability_data->bo;
2795 log2num = (int)log2(slice_param->num_of_partitions - 1);
2797 BEGIN_BCS_BATCH(batch, 38);
2798 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2799 OUT_BCS_BATCH(batch,
2800 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2801 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2802 OUT_BCS_BATCH(batch,
2804 pic_param->pic_fields.bits.sharpness_level << 16 |
2805 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2806 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2807 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2808 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2809 pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2810 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2811 0 << 7 | /* segmentation id streamin disabled */
2812 0 << 6 | /* segmentation id streamout disabled */
2813 pic_param->pic_fields.bits.key_frame << 5 |
2814 pic_param->pic_fields.bits.filter_type << 4 |
2815 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2816 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2818 OUT_BCS_BATCH(batch,
2819 pic_param->loop_filter_level[3] << 24 |
2820 pic_param->loop_filter_level[2] << 16 |
2821 pic_param->loop_filter_level[1] << 8 |
2822 pic_param->loop_filter_level[0] << 0);
2824 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2825 for (i = 0; i < 4; i++) {
2826 OUT_BCS_BATCH(batch,
2827 iq_matrix->quantization_index[i][0] << 16 | /* Y1AC */
2828 iq_matrix->quantization_index[i][1] << 0); /* Y1DC */
2829 OUT_BCS_BATCH(batch,
2830 iq_matrix->quantization_index[i][5] << 16 | /* UVAC */
2831 iq_matrix->quantization_index[i][4] << 0); /* UVDC */
2832 OUT_BCS_BATCH(batch,
2833 iq_matrix->quantization_index[i][3] << 16 | /* Y2AC */
2834 iq_matrix->quantization_index[i][2] << 0); /* Y2DC */
2837 /* CoeffProbability table for non-key frame, DW16-DW18 */
2839 OUT_BCS_RELOC(batch, probs_bo,
2840 0, I915_GEM_DOMAIN_INSTRUCTION,
2842 OUT_BCS_BATCH(batch, 0);
2843 OUT_BCS_BATCH(batch, 0);
2845 OUT_BCS_BATCH(batch, 0);
2846 OUT_BCS_BATCH(batch, 0);
2847 OUT_BCS_BATCH(batch, 0);
2850 OUT_BCS_BATCH(batch,
2851 pic_param->mb_segment_tree_probs[2] << 16 |
2852 pic_param->mb_segment_tree_probs[1] << 8 |
2853 pic_param->mb_segment_tree_probs[0] << 0);
2855 OUT_BCS_BATCH(batch,
2856 pic_param->prob_skip_false << 24 |
2857 pic_param->prob_intra << 16 |
2858 pic_param->prob_last << 8 |
2859 pic_param->prob_gf << 0);
2861 OUT_BCS_BATCH(batch,
2862 pic_param->y_mode_probs[3] << 24 |
2863 pic_param->y_mode_probs[2] << 16 |
2864 pic_param->y_mode_probs[1] << 8 |
2865 pic_param->y_mode_probs[0] << 0);
2867 OUT_BCS_BATCH(batch,
2868 pic_param->uv_mode_probs[2] << 16 |
2869 pic_param->uv_mode_probs[1] << 8 |
2870 pic_param->uv_mode_probs[0] << 0);
2872 /* MV update value, DW23-DW32 */
2873 for (i = 0; i < 2; i++) {
2874 for (j = 0; j < 20; j += 4) {
2875 OUT_BCS_BATCH(batch,
2876 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2877 pic_param->mv_probs[i][j + 2] << 16 |
2878 pic_param->mv_probs[i][j + 1] << 8 |
2879 pic_param->mv_probs[i][j + 0] << 0);
2883 OUT_BCS_BATCH(batch,
2884 pic_param->loop_filter_deltas_ref_frame[3] << 24 |
2885 pic_param->loop_filter_deltas_ref_frame[2] << 16 |
2886 pic_param->loop_filter_deltas_ref_frame[1] << 8 |
2887 pic_param->loop_filter_deltas_ref_frame[0] << 0);
2889 OUT_BCS_BATCH(batch,
2890 pic_param->loop_filter_deltas_mode[3] << 24 |
2891 pic_param->loop_filter_deltas_mode[2] << 16 |
2892 pic_param->loop_filter_deltas_mode[1] << 8 |
2893 pic_param->loop_filter_deltas_mode[0] << 0);
2895 /* segmentation id stream base address, DW35-DW37 */
2896 OUT_BCS_BATCH(batch, 0);
2897 OUT_BCS_BATCH(batch, 0);
2898 OUT_BCS_BATCH(batch, 0);
2899 ADVANCE_BCS_BATCH(batch);
2903 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2904 VAPictureParameterBufferVP8 *pic_param,
2905 VASliceParameterBufferVP8 *slice_param,
2906 dri_bo *slice_data_bo,
2907 struct gen7_mfd_context *gen7_mfd_context)
2909 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2911 unsigned int offset = slice_param->slice_data_offset;
2913 assert(slice_param->num_of_partitions >= 2);
2914 assert(slice_param->num_of_partitions <= 9);
2916 log2num = (int)log2(slice_param->num_of_partitions - 1);
2918 BEGIN_BCS_BATCH(batch, 22);
2919 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2920 OUT_BCS_BATCH(batch,
2921 pic_param->bool_coder_ctx.count << 16 | /* Partition 0 CPBAC Entropy Count */
2922 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2924 (slice_param->macroblock_offset & 0x7));
2925 OUT_BCS_BATCH(batch,
2926 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2929 for (i = 0; i < 9; i++) {
2930 if (i < slice_param->num_of_partitions) {
2931 OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2932 OUT_BCS_BATCH(batch, offset);
2934 OUT_BCS_BATCH(batch, 0);
2935 OUT_BCS_BATCH(batch, 0);
2938 offset += slice_param->partition_size[i];
2941 OUT_BCS_BATCH(batch,
2942 1 << 31 | /* concealment method */
2945 ADVANCE_BCS_BATCH(batch);
2949 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
2950 struct decode_state *decode_state,
2951 struct gen7_mfd_context *gen7_mfd_context)
2953 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2954 VAPictureParameterBufferVP8 *pic_param;
2955 VASliceParameterBufferVP8 *slice_param;
2956 dri_bo *slice_data_bo;
2958 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2959 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2961 /* one slice per frame */
2962 assert(decode_state->num_slice_params == 1);
2963 assert(decode_state->slice_params[0]->num_elements == 1);
2964 assert(decode_state->slice_params && decode_state->slice_params[0]->buffer);
2965 assert(decode_state->slice_datas[0]->bo);
2967 assert(decode_state->probability_data);
2969 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
2970 slice_data_bo = decode_state->slice_datas[0]->bo;
2972 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
2973 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2974 intel_batchbuffer_emit_mi_flush(batch);
2975 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
2976 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
2977 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
2978 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
2979 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
2980 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
2981 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
2982 intel_batchbuffer_end_atomic(batch);
2983 intel_batchbuffer_flush(batch);
2987 gen8_mfd_decode_picture(VADriverContextP ctx,
2989 union codec_state *codec_state,
2990 struct hw_context *hw_context)
2993 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2994 struct decode_state *decode_state = &codec_state->decode;
2997 assert(gen7_mfd_context);
2999 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3001 if (vaStatus != VA_STATUS_SUCCESS)
3004 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3007 case VAProfileMPEG2Simple:
3008 case VAProfileMPEG2Main:
3009 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3012 case VAProfileH264Baseline:
3013 case VAProfileH264Main:
3014 case VAProfileH264High:
3015 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3018 case VAProfileVC1Simple:
3019 case VAProfileVC1Main:
3020 case VAProfileVC1Advanced:
3021 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3024 case VAProfileJPEGBaseline:
3025 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3028 case VAProfileVP8Version0_3:
3029 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3037 vaStatus = VA_STATUS_SUCCESS;
3044 gen8_mfd_context_destroy(void *hw_context)
3046 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3048 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3049 gen7_mfd_context->post_deblocking_output.bo = NULL;
3051 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3052 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3054 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3055 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3057 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3058 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3060 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3061 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3063 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3064 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3066 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3067 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3069 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3071 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3072 free(gen7_mfd_context);
3075 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3076 struct gen7_mfd_context *gen7_mfd_context)
3078 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3079 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3080 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3081 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3085 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3087 struct intel_driver_data *intel = intel_driver_data(ctx);
3088 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3091 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3092 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3093 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3095 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3096 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3097 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3100 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3102 switch (obj_config->profile) {
3103 case VAProfileMPEG2Simple:
3104 case VAProfileMPEG2Main:
3105 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3108 case VAProfileH264Baseline:
3109 case VAProfileH264Main:
3110 case VAProfileH264High:
3111 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3116 return (struct hw_context *)gen7_mfd_context;