2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 assert((obj_surface->size & 0x3f) == 0);
78 obj_surface->private_data = gen7_avc_surface;
81 gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
82 !pic_param->seq_fields.bits.direct_8x8_inference_flag);
84 if (gen7_avc_surface->dmv_top == NULL) {
85 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86 "direct mv w/r buffer",
87 width_in_mbs * height_in_mbs * 128,
89 assert(gen7_avc_surface->dmv_top);
92 if (gen7_avc_surface->dmv_bottom_flag &&
93 gen7_avc_surface->dmv_bottom == NULL) {
94 gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
95 "direct mv w/r buffer",
96 width_in_mbs * height_in_mbs * 128,
98 assert(gen7_avc_surface->dmv_bottom);
103 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
104 struct decode_state *decode_state,
106 struct gen7_mfd_context *gen7_mfd_context)
108 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
110 assert(standard_select == MFX_FORMAT_MPEG2 ||
111 standard_select == MFX_FORMAT_AVC ||
112 standard_select == MFX_FORMAT_VC1 ||
113 standard_select == MFX_FORMAT_JPEG ||
114 standard_select == MFX_FORMAT_VP8);
116 BEGIN_BCS_BATCH(batch, 5);
117 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
119 (MFX_LONG_MODE << 17) | /* Currently only support long format */
120 (MFD_MODE_VLD << 15) | /* VLD mode */
121 (0 << 10) | /* disable Stream-Out */
122 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
123 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
124 (0 << 5) | /* not in stitch mode */
125 (MFX_CODEC_DECODE << 4) | /* decoding mode */
126 (standard_select << 0));
128 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
129 (0 << 3) | /* terminate if AVC mbdata error occurs */
130 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
133 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
134 OUT_BCS_BATCH(batch, 0); /* reserved */
135 ADVANCE_BCS_BATCH(batch);
139 gen8_mfd_surface_state(VADriverContextP ctx,
140 struct decode_state *decode_state,
142 struct gen7_mfd_context *gen7_mfd_context)
144 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
145 struct object_surface *obj_surface = decode_state->render_object;
146 unsigned int y_cb_offset;
147 unsigned int y_cr_offset;
151 y_cb_offset = obj_surface->y_cb_offset;
152 y_cr_offset = obj_surface->y_cr_offset;
154 BEGIN_BCS_BATCH(batch, 6);
155 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
156 OUT_BCS_BATCH(batch, 0);
158 ((obj_surface->orig_height - 1) << 18) |
159 ((obj_surface->orig_width - 1) << 4));
161 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
162 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
163 (0 << 22) | /* surface object control state, ignored */
164 ((obj_surface->width - 1) << 3) | /* pitch */
165 (0 << 2) | /* must be 0 */
166 (1 << 1) | /* must be tiled */
167 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
169 (0 << 16) | /* X offset for U(Cb), must be 0 */
170 (y_cb_offset << 0)); /* Y offset for U(Cb) */
172 (0 << 16) | /* X offset for V(Cr), must be 0 */
173 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
174 ADVANCE_BCS_BATCH(batch);
178 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
179 struct decode_state *decode_state,
181 struct gen7_mfd_context *gen7_mfd_context)
183 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
186 BEGIN_BCS_BATCH(batch, 61);
187 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188 /* Pre-deblock 1-3 */
189 if (gen7_mfd_context->pre_deblocking_output.valid)
190 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
191 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194 OUT_BCS_BATCH(batch, 0);
196 OUT_BCS_BATCH(batch, 0);
197 OUT_BCS_BATCH(batch, 0);
198 /* Post-debloing 4-6 */
199 if (gen7_mfd_context->post_deblocking_output.valid)
200 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
201 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204 OUT_BCS_BATCH(batch, 0);
206 OUT_BCS_BATCH(batch, 0);
207 OUT_BCS_BATCH(batch, 0);
209 /* uncompressed-video & stream out 7-12 */
210 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
211 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
212 OUT_BCS_BATCH(batch, 0);
213 OUT_BCS_BATCH(batch, 0);
214 OUT_BCS_BATCH(batch, 0);
215 OUT_BCS_BATCH(batch, 0);
217 /* intra row-store scratch 13-15 */
218 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
219 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
220 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
223 OUT_BCS_BATCH(batch, 0);
225 OUT_BCS_BATCH(batch, 0);
226 OUT_BCS_BATCH(batch, 0);
227 /* deblocking-filter-row-store 16-18 */
228 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
229 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
230 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
233 OUT_BCS_BATCH(batch, 0);
234 OUT_BCS_BATCH(batch, 0);
235 OUT_BCS_BATCH(batch, 0);
238 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
239 struct object_surface *obj_surface;
241 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
242 gen7_mfd_context->reference_surface[i].obj_surface &&
243 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
244 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
246 OUT_BCS_RELOC(batch, obj_surface->bo,
247 I915_GEM_DOMAIN_INSTRUCTION, 0,
250 OUT_BCS_BATCH(batch, 0);
253 OUT_BCS_BATCH(batch, 0);
256 /* reference property 51 */
257 OUT_BCS_BATCH(batch, 0);
259 /* Macroblock status & ILDB 52-57 */
260 OUT_BCS_BATCH(batch, 0);
261 OUT_BCS_BATCH(batch, 0);
262 OUT_BCS_BATCH(batch, 0);
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
265 OUT_BCS_BATCH(batch, 0);
267 /* the second Macroblock status 58-60 */
268 OUT_BCS_BATCH(batch, 0);
269 OUT_BCS_BATCH(batch, 0);
270 OUT_BCS_BATCH(batch, 0);
272 ADVANCE_BCS_BATCH(batch);
276 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
277 dri_bo *slice_data_bo,
279 struct gen7_mfd_context *gen7_mfd_context)
281 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
283 BEGIN_BCS_BATCH(batch, 26);
284 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
286 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
287 OUT_BCS_BATCH(batch, 0);
288 OUT_BCS_BATCH(batch, 0);
289 /* Upper bound 4-5 */
290 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
291 OUT_BCS_BATCH(batch, 0);
293 /* MFX indirect MV 6-10 */
294 OUT_BCS_BATCH(batch, 0);
295 OUT_BCS_BATCH(batch, 0);
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
300 /* MFX IT_COFF 11-15 */
301 OUT_BCS_BATCH(batch, 0);
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
307 /* MFX IT_DBLK 16-20 */
308 OUT_BCS_BATCH(batch, 0);
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
314 /* MFX PAK_BSE object for encoder 21-25 */
315 OUT_BCS_BATCH(batch, 0);
316 OUT_BCS_BATCH(batch, 0);
317 OUT_BCS_BATCH(batch, 0);
318 OUT_BCS_BATCH(batch, 0);
319 OUT_BCS_BATCH(batch, 0);
321 ADVANCE_BCS_BATCH(batch);
325 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
326 struct decode_state *decode_state,
328 struct gen7_mfd_context *gen7_mfd_context)
330 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
332 BEGIN_BCS_BATCH(batch, 10);
333 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
335 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
336 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
337 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
340 OUT_BCS_BATCH(batch, 0);
342 OUT_BCS_BATCH(batch, 0);
343 OUT_BCS_BATCH(batch, 0);
344 /* MPR Row Store Scratch buffer 4-6 */
345 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
346 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
347 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
350 OUT_BCS_BATCH(batch, 0);
352 OUT_BCS_BATCH(batch, 0);
353 OUT_BCS_BATCH(batch, 0);
356 if (gen7_mfd_context->bitplane_read_buffer.valid)
357 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
358 I915_GEM_DOMAIN_INSTRUCTION, 0,
361 OUT_BCS_BATCH(batch, 0);
362 OUT_BCS_BATCH(batch, 0);
363 OUT_BCS_BATCH(batch, 0);
364 ADVANCE_BCS_BATCH(batch);
368 gen8_mfd_qm_state(VADriverContextP ctx,
372 struct gen7_mfd_context *gen7_mfd_context)
374 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
375 unsigned int qm_buffer[16];
377 assert(qm_length <= 16 * 4);
378 memcpy(qm_buffer, qm, qm_length);
380 BEGIN_BCS_BATCH(batch, 18);
381 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
382 OUT_BCS_BATCH(batch, qm_type << 0);
383 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
384 ADVANCE_BCS_BATCH(batch);
388 gen8_mfd_avc_img_state(VADriverContextP ctx,
389 struct decode_state *decode_state,
390 struct gen7_mfd_context *gen7_mfd_context)
392 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
394 int mbaff_frame_flag;
395 unsigned int width_in_mbs, height_in_mbs;
396 VAPictureParameterBufferH264 *pic_param;
398 assert(decode_state->pic_param && decode_state->pic_param->buffer);
399 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
400 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
402 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
404 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
409 if ((img_struct & 0x1) == 0x1) {
410 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
412 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
415 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
416 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
417 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
419 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
422 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
423 !pic_param->pic_fields.bits.field_pic_flag);
425 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
426 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
428 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
429 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
430 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
431 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
433 BEGIN_BCS_BATCH(batch, 17);
434 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
436 (width_in_mbs * height_in_mbs - 1));
438 ((height_in_mbs - 1) << 16) |
439 ((width_in_mbs - 1) << 0));
441 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
442 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
443 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
444 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
445 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
446 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
449 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
450 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
451 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
452 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
453 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
454 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
455 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
456 (mbaff_frame_flag << 1) |
457 (pic_param->pic_fields.bits.field_pic_flag << 0));
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 OUT_BCS_BATCH(batch, 0);
465 OUT_BCS_BATCH(batch, 0);
466 OUT_BCS_BATCH(batch, 0);
467 OUT_BCS_BATCH(batch, 0);
468 OUT_BCS_BATCH(batch, 0);
469 OUT_BCS_BATCH(batch, 0);
470 ADVANCE_BCS_BATCH(batch);
474 gen8_mfd_avc_qm_state(VADriverContextP ctx,
475 struct decode_state *decode_state,
476 struct gen7_mfd_context *gen7_mfd_context)
478 VAIQMatrixBufferH264 *iq_matrix;
479 VAPictureParameterBufferH264 *pic_param;
481 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
482 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
484 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
486 assert(decode_state->pic_param && decode_state->pic_param->buffer);
487 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
489 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
490 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
492 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
493 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
494 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
499 gen8_mfd_avc_picid_state(VADriverContextP ctx,
500 struct decode_state *decode_state,
501 struct gen7_mfd_context *gen7_mfd_context)
503 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
505 BEGIN_BCS_BATCH(batch, 10);
506 OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
507 OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
508 OUT_BCS_BATCH(batch, 0);
509 OUT_BCS_BATCH(batch, 0);
510 OUT_BCS_BATCH(batch, 0);
511 OUT_BCS_BATCH(batch, 0);
512 OUT_BCS_BATCH(batch, 0);
513 OUT_BCS_BATCH(batch, 0);
514 OUT_BCS_BATCH(batch, 0);
515 OUT_BCS_BATCH(batch, 0);
516 ADVANCE_BCS_BATCH(batch);
520 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
521 struct decode_state *decode_state,
522 VAPictureParameterBufferH264 *pic_param,
523 VASliceParameterBufferH264 *slice_param,
524 struct gen7_mfd_context *gen7_mfd_context)
526 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527 struct object_surface *obj_surface;
528 GenAvcSurface *gen7_avc_surface;
529 VAPictureH264 *va_pic;
532 BEGIN_BCS_BATCH(batch, 71);
533 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
535 /* reference surfaces 0..15 */
536 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538 gen7_mfd_context->reference_surface[i].obj_surface &&
539 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
541 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542 gen7_avc_surface = obj_surface->private_data;
544 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
545 I915_GEM_DOMAIN_INSTRUCTION, 0,
547 OUT_BCS_BATCH(batch, 0);
549 OUT_BCS_BATCH(batch, 0);
550 OUT_BCS_BATCH(batch, 0);
554 OUT_BCS_BATCH(batch, 0);
556 /* the current decoding frame/field */
557 va_pic = &pic_param->CurrPic;
558 obj_surface = decode_state->render_object;
559 assert(obj_surface->bo && obj_surface->private_data);
560 gen7_avc_surface = obj_surface->private_data;
562 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
563 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
566 OUT_BCS_BATCH(batch, 0);
567 OUT_BCS_BATCH(batch, 0);
570 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
571 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
574 assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
576 for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
577 va_pic = &pic_param->ReferenceFrames[j];
579 if (va_pic->flags & VA_PICTURE_H264_INVALID)
582 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
589 assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
591 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
592 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
594 OUT_BCS_BATCH(batch, 0);
595 OUT_BCS_BATCH(batch, 0);
599 va_pic = &pic_param->CurrPic;
600 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
601 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
603 ADVANCE_BCS_BATCH(batch);
607 gen8_mfd_avc_slice_state(VADriverContextP ctx,
608 VAPictureParameterBufferH264 *pic_param,
609 VASliceParameterBufferH264 *slice_param,
610 VASliceParameterBufferH264 *next_slice_param,
611 struct gen7_mfd_context *gen7_mfd_context)
613 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
614 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
615 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
616 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
617 int num_ref_idx_l0, num_ref_idx_l1;
618 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
619 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
620 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
623 if (slice_param->slice_type == SLICE_TYPE_I ||
624 slice_param->slice_type == SLICE_TYPE_SI) {
625 slice_type = SLICE_TYPE_I;
626 } else if (slice_param->slice_type == SLICE_TYPE_P ||
627 slice_param->slice_type == SLICE_TYPE_SP) {
628 slice_type = SLICE_TYPE_P;
630 assert(slice_param->slice_type == SLICE_TYPE_B);
631 slice_type = SLICE_TYPE_B;
634 if (slice_type == SLICE_TYPE_I) {
635 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
636 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
639 } else if (slice_type == SLICE_TYPE_P) {
640 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
641 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
644 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
645 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
648 first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
649 slice_hor_pos = first_mb_in_slice % width_in_mbs;
650 slice_ver_pos = first_mb_in_slice / width_in_mbs;
652 if (next_slice_param) {
653 first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
654 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
655 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
657 next_slice_hor_pos = 0;
658 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
661 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663 OUT_BCS_BATCH(batch, slice_type);
665 (num_ref_idx_l1 << 24) |
666 (num_ref_idx_l0 << 16) |
667 (slice_param->chroma_log2_weight_denom << 8) |
668 (slice_param->luma_log2_weight_denom << 0));
670 (slice_param->direct_spatial_mv_pred_flag << 29) |
671 (slice_param->disable_deblocking_filter_idc << 27) |
672 (slice_param->cabac_init_idc << 24) |
673 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
677 (slice_ver_pos << 24) |
678 (slice_hor_pos << 16) |
679 (first_mb_in_slice << 0));
681 (next_slice_ver_pos << 16) |
682 (next_slice_hor_pos << 0));
684 (next_slice_param == NULL) << 19); /* last slice flag */
685 OUT_BCS_BATCH(batch, 0);
686 OUT_BCS_BATCH(batch, 0);
687 OUT_BCS_BATCH(batch, 0);
688 OUT_BCS_BATCH(batch, 0);
689 ADVANCE_BCS_BATCH(batch);
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694 VAPictureParameterBufferH264 *pic_param,
695 VASliceParameterBufferH264 *slice_param,
696 struct gen7_mfd_context *gen7_mfd_context)
698 gen6_send_avc_ref_idx_state(
699 gen7_mfd_context->base.batch,
701 gen7_mfd_context->reference_surface
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707 VAPictureParameterBufferH264 *pic_param,
708 VASliceParameterBufferH264 *slice_param,
709 struct gen7_mfd_context *gen7_mfd_context)
711 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712 int i, j, num_weight_offset_table = 0;
713 short weightoffsets[32 * 6];
715 if ((slice_param->slice_type == SLICE_TYPE_P ||
716 slice_param->slice_type == SLICE_TYPE_SP) &&
717 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718 num_weight_offset_table = 1;
721 if ((slice_param->slice_type == SLICE_TYPE_B) &&
722 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723 num_weight_offset_table = 2;
726 for (i = 0; i < num_weight_offset_table; i++) {
727 BEGIN_BCS_BATCH(batch, 98);
728 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729 OUT_BCS_BATCH(batch, i);
732 for (j = 0; j < 32; j++) {
733 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
741 for (j = 0; j < 32; j++) {
742 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
751 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752 ADVANCE_BCS_BATCH(batch);
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758 VAPictureParameterBufferH264 *pic_param,
759 VASliceParameterBufferH264 *slice_param,
760 dri_bo *slice_data_bo,
761 VASliceParameterBufferH264 *next_slice_param,
762 struct gen7_mfd_context *gen7_mfd_context)
764 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
767 pic_param->pic_fields.bits.entropy_coding_mode_flag);
769 /* the input bitsteam format on GEN7 differs from GEN6 */
770 BEGIN_BCS_BATCH(batch, 6);
771 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
773 (slice_param->slice_data_size));
774 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
782 ((slice_data_bit_offset >> 3) << 16) |
786 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787 (slice_data_bit_offset & 0x7));
788 OUT_BCS_BATCH(batch, 0);
789 ADVANCE_BCS_BATCH(batch);
793 gen8_mfd_avc_context_init(
794 VADriverContextP ctx,
795 struct gen7_mfd_context *gen7_mfd_context
798 /* Initialize flat scaling lists */
799 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804 struct decode_state *decode_state,
805 struct gen7_mfd_context *gen7_mfd_context)
807 VAPictureParameterBufferH264 *pic_param;
808 VASliceParameterBufferH264 *slice_param;
809 struct i965_driver_data *i965 = i965_driver_data(ctx);
810 struct object_surface *obj_surface;
812 int i, j, enable_avc_ildb = 0;
813 unsigned int width_in_mbs, height_in_mbs;
815 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
819 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821 assert((slice_param->slice_type == SLICE_TYPE_I) ||
822 (slice_param->slice_type == SLICE_TYPE_SI) ||
823 (slice_param->slice_type == SLICE_TYPE_P) ||
824 (slice_param->slice_type == SLICE_TYPE_SP) ||
825 (slice_param->slice_type == SLICE_TYPE_B));
827 if (slice_param->disable_deblocking_filter_idc != 1) {
836 assert(decode_state->pic_param && decode_state->pic_param->buffer);
837 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838 intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
839 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
840 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
841 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
842 assert(height_in_mbs > 0 && height_in_mbs <= 256);
844 /* Current decoded picture */
845 obj_surface = decode_state->render_object;
846 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
847 obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
849 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
850 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
852 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
853 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
854 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
855 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
857 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
858 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
859 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
860 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
862 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
863 bo = dri_bo_alloc(i965->intel.bufmgr,
868 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
869 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
871 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
872 bo = dri_bo_alloc(i965->intel.bufmgr,
873 "deblocking filter row store",
874 width_in_mbs * 64 * 4,
877 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
878 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
880 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
881 bo = dri_bo_alloc(i965->intel.bufmgr,
883 width_in_mbs * 64 * 2,
886 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
887 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
889 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
890 bo = dri_bo_alloc(i965->intel.bufmgr,
892 width_in_mbs * 64 * 2,
895 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
896 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
898 gen7_mfd_context->bitplane_read_buffer.valid = 0;
902 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
903 struct decode_state *decode_state,
904 struct gen7_mfd_context *gen7_mfd_context)
906 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
907 VAPictureParameterBufferH264 *pic_param;
908 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
909 dri_bo *slice_data_bo;
912 assert(decode_state->pic_param && decode_state->pic_param->buffer);
913 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
914 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
916 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
917 intel_batchbuffer_emit_mi_flush(batch);
918 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
919 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
920 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
921 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
923 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
924 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
926 for (j = 0; j < decode_state->num_slice_params; j++) {
927 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
928 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
929 slice_data_bo = decode_state->slice_datas[j]->bo;
930 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
932 if (j == decode_state->num_slice_params - 1)
933 next_slice_group_param = NULL;
935 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
937 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
938 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
939 assert((slice_param->slice_type == SLICE_TYPE_I) ||
940 (slice_param->slice_type == SLICE_TYPE_SI) ||
941 (slice_param->slice_type == SLICE_TYPE_P) ||
942 (slice_param->slice_type == SLICE_TYPE_SP) ||
943 (slice_param->slice_type == SLICE_TYPE_B));
945 if (i < decode_state->slice_params[j]->num_elements - 1)
946 next_slice_param = slice_param + 1;
948 next_slice_param = next_slice_group_param;
950 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
951 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
952 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
953 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
954 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
959 intel_batchbuffer_end_atomic(batch);
960 intel_batchbuffer_flush(batch);
964 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
965 struct decode_state *decode_state,
966 struct gen7_mfd_context *gen7_mfd_context)
968 VAPictureParameterBufferMPEG2 *pic_param;
969 struct i965_driver_data *i965 = i965_driver_data(ctx);
970 struct object_surface *obj_surface;
972 unsigned int width_in_mbs;
974 assert(decode_state->pic_param && decode_state->pic_param->buffer);
975 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
976 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
978 mpeg2_set_reference_surfaces(
980 gen7_mfd_context->reference_surface,
985 /* Current decoded picture */
986 obj_surface = decode_state->render_object;
987 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
989 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
990 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
991 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
992 gen7_mfd_context->pre_deblocking_output.valid = 1;
994 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
995 bo = dri_bo_alloc(i965->intel.bufmgr,
1000 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1001 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1003 gen7_mfd_context->post_deblocking_output.valid = 0;
1004 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1005 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1006 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1007 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1011 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1012 struct decode_state *decode_state,
1013 struct gen7_mfd_context *gen7_mfd_context)
1015 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1016 VAPictureParameterBufferMPEG2 *pic_param;
1017 unsigned int slice_concealment_disable_bit = 0;
1019 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1020 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1022 slice_concealment_disable_bit = 1;
1024 BEGIN_BCS_BATCH(batch, 13);
1025 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1026 OUT_BCS_BATCH(batch,
1027 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1028 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1029 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1030 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1031 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1032 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1033 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1034 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1035 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1036 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1037 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1038 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1039 OUT_BCS_BATCH(batch,
1040 pic_param->picture_coding_type << 9);
1041 OUT_BCS_BATCH(batch,
1042 (slice_concealment_disable_bit << 31) |
1043 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1044 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1045 OUT_BCS_BATCH(batch, 0);
1046 OUT_BCS_BATCH(batch, 0);
1047 OUT_BCS_BATCH(batch, 0);
1048 OUT_BCS_BATCH(batch, 0);
1049 OUT_BCS_BATCH(batch, 0);
1050 OUT_BCS_BATCH(batch, 0);
1051 OUT_BCS_BATCH(batch, 0);
1052 OUT_BCS_BATCH(batch, 0);
1053 OUT_BCS_BATCH(batch, 0);
1054 ADVANCE_BCS_BATCH(batch);
1058 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1059 struct decode_state *decode_state,
1060 struct gen7_mfd_context *gen7_mfd_context)
1062 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1065 /* Update internal QM state */
1066 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1067 VAIQMatrixBufferMPEG2 * const iq_matrix =
1068 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1070 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1071 iq_matrix->load_intra_quantiser_matrix) {
1072 gen_iq_matrix->load_intra_quantiser_matrix =
1073 iq_matrix->load_intra_quantiser_matrix;
1074 if (iq_matrix->load_intra_quantiser_matrix) {
1075 for (j = 0; j < 64; j++)
1076 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1077 iq_matrix->intra_quantiser_matrix[j];
1081 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1082 iq_matrix->load_non_intra_quantiser_matrix) {
1083 gen_iq_matrix->load_non_intra_quantiser_matrix =
1084 iq_matrix->load_non_intra_quantiser_matrix;
1085 if (iq_matrix->load_non_intra_quantiser_matrix) {
1086 for (j = 0; j < 64; j++)
1087 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1088 iq_matrix->non_intra_quantiser_matrix[j];
1093 /* Commit QM state to HW */
1094 for (i = 0; i < 2; i++) {
1095 unsigned char *qm = NULL;
1099 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1100 qm = gen_iq_matrix->intra_quantiser_matrix;
1101 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1104 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1105 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1106 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1113 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1118 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1119 VAPictureParameterBufferMPEG2 *pic_param,
1120 VASliceParameterBufferMPEG2 *slice_param,
1121 VASliceParameterBufferMPEG2 *next_slice_param,
1122 struct gen7_mfd_context *gen7_mfd_context)
1124 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1125 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1126 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1128 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1129 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1131 is_field_pic_wa = is_field_pic &&
1132 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1134 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1135 hpos0 = slice_param->slice_horizontal_position;
1137 if (next_slice_param == NULL) {
1138 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1141 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1142 hpos1 = next_slice_param->slice_horizontal_position;
1145 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1147 BEGIN_BCS_BATCH(batch, 5);
1148 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1149 OUT_BCS_BATCH(batch,
1150 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1151 OUT_BCS_BATCH(batch,
1152 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1153 OUT_BCS_BATCH(batch,
1157 (next_slice_param == NULL) << 5 |
1158 (next_slice_param == NULL) << 3 |
1159 (slice_param->macroblock_offset & 0x7));
1160 OUT_BCS_BATCH(batch,
1161 (slice_param->quantiser_scale_code << 24) |
1162 (vpos1 << 8 | hpos1));
1163 ADVANCE_BCS_BATCH(batch);
1167 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1168 struct decode_state *decode_state,
1169 struct gen7_mfd_context *gen7_mfd_context)
1171 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1172 VAPictureParameterBufferMPEG2 *pic_param;
1173 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1174 dri_bo *slice_data_bo;
1177 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1178 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1180 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1181 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1182 intel_batchbuffer_emit_mi_flush(batch);
1183 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1184 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1185 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1186 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1187 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1188 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1190 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1191 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1192 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1194 for (j = 0; j < decode_state->num_slice_params; j++) {
1195 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1196 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1197 slice_data_bo = decode_state->slice_datas[j]->bo;
1198 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1200 if (j == decode_state->num_slice_params - 1)
1201 next_slice_group_param = NULL;
1203 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1205 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1206 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1208 if (i < decode_state->slice_params[j]->num_elements - 1)
1209 next_slice_param = slice_param + 1;
1211 next_slice_param = next_slice_group_param;
1213 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1218 intel_batchbuffer_end_atomic(batch);
1219 intel_batchbuffer_flush(batch);
1222 static const int va_to_gen7_vc1_pic_type[5] = {
1226 GEN7_VC1_BI_PICTURE,
1230 static const int va_to_gen7_vc1_mv[4] = {
1232 2, /* 1-MV half-pel */
1233 3, /* 1-MV half-pef bilinear */
1237 static const int b_picture_scale_factor[21] = {
1238 128, 85, 170, 64, 192,
1239 51, 102, 153, 204, 43,
1240 215, 37, 74, 111, 148,
1241 185, 222, 32, 96, 160,
1245 static const int va_to_gen7_vc1_condover[3] = {
1251 static const int va_to_gen7_vc1_profile[4] = {
1252 GEN7_VC1_SIMPLE_PROFILE,
1253 GEN7_VC1_MAIN_PROFILE,
1254 GEN7_VC1_RESERVED_PROFILE,
1255 GEN7_VC1_ADVANCED_PROFILE
1259 gen8_mfd_free_vc1_surface(void **data)
1261 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1263 if (!gen7_vc1_surface)
1266 dri_bo_unreference(gen7_vc1_surface->dmv);
1267 free(gen7_vc1_surface);
1272 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1273 VAPictureParameterBufferVC1 *pic_param,
1274 struct object_surface *obj_surface)
1276 struct i965_driver_data *i965 = i965_driver_data(ctx);
1277 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1278 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1279 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1281 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1283 if (!gen7_vc1_surface) {
1284 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1285 assert((obj_surface->size & 0x3f) == 0);
1286 obj_surface->private_data = gen7_vc1_surface;
1289 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1291 if (gen7_vc1_surface->dmv == NULL) {
1292 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1293 "direct mv w/r buffer",
1294 width_in_mbs * height_in_mbs * 64,
1300 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1301 struct decode_state *decode_state,
1302 struct gen7_mfd_context *gen7_mfd_context)
1304 VAPictureParameterBufferVC1 *pic_param;
1305 struct i965_driver_data *i965 = i965_driver_data(ctx);
1306 struct object_surface *obj_surface;
1311 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1312 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1313 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1314 picture_type = pic_param->picture_fields.bits.picture_type;
1316 intel_update_vc1_frame_store_index(ctx,
1319 gen7_mfd_context->reference_surface);
1321 /* Current decoded picture */
1322 obj_surface = decode_state->render_object;
1323 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1324 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1326 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1327 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1328 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1329 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1331 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1332 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1333 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1334 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1336 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1337 bo = dri_bo_alloc(i965->intel.bufmgr,
1342 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1343 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1345 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1346 bo = dri_bo_alloc(i965->intel.bufmgr,
1347 "deblocking filter row store",
1348 width_in_mbs * 7 * 64,
1351 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1352 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1354 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1355 bo = dri_bo_alloc(i965->intel.bufmgr,
1356 "bsd mpc row store",
1360 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1361 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1363 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1365 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1366 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1368 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1369 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1370 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1371 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1373 uint8_t *src = NULL, *dst = NULL;
1375 assert(decode_state->bit_plane->buffer);
1376 src = decode_state->bit_plane->buffer;
1378 bo = dri_bo_alloc(i965->intel.bufmgr,
1380 bitplane_width * height_in_mbs,
1383 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1385 dri_bo_map(bo, True);
1386 assert(bo->virtual);
1389 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1390 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1391 int src_index, dst_index;
1395 src_index = (src_h * width_in_mbs + src_w) / 2;
1396 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1397 src_value = ((src[src_index] >> src_shift) & 0xf);
1399 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1403 dst_index = src_w / 2;
1404 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1408 dst[src_w / 2] >>= 4;
1410 dst += bitplane_width;
1415 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1419 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1420 struct decode_state *decode_state,
1421 struct gen7_mfd_context *gen7_mfd_context)
1423 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1424 VAPictureParameterBufferVC1 *pic_param;
1425 struct object_surface *obj_surface;
1426 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1427 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1428 int unified_mv_mode;
1429 int ref_field_pic_polarity = 0;
1430 int scale_factor = 0;
1432 int dmv_surface_valid = 0;
1438 int interpolation_mode = 0;
1440 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1441 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1443 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1444 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1445 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1446 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1447 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1448 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1449 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1450 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1453 alt_pquant_config = 0;
1454 alt_pquant_edge_mask = 0;
1455 } else if (dquant == 2) {
1456 alt_pquant_config = 1;
1457 alt_pquant_edge_mask = 0xf;
1459 assert(dquant == 1);
1460 if (dquantfrm == 0) {
1461 alt_pquant_config = 0;
1462 alt_pquant_edge_mask = 0;
1465 assert(dquantfrm == 1);
1466 alt_pquant_config = 1;
1468 switch (dqprofile) {
1470 if (dqbilevel == 0) {
1471 alt_pquant_config = 2;
1472 alt_pquant_edge_mask = 0;
1474 assert(dqbilevel == 1);
1475 alt_pquant_config = 3;
1476 alt_pquant_edge_mask = 0;
1481 alt_pquant_edge_mask = 0xf;
1486 alt_pquant_edge_mask = 0x9;
1488 alt_pquant_edge_mask = (0x3 << dqdbedge);
1493 alt_pquant_edge_mask = (0x1 << dqsbedge);
1502 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1503 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1504 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1506 assert(pic_param->mv_fields.bits.mv_mode < 4);
1507 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1510 if (pic_param->sequence_fields.bits.interlace == 1 &&
1511 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1512 /* FIXME: calculate reference field picture polarity */
1514 ref_field_pic_polarity = 0;
1517 if (pic_param->b_picture_fraction < 21)
1518 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1520 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1522 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1523 picture_type == GEN7_VC1_I_PICTURE)
1524 picture_type = GEN7_VC1_BI_PICTURE;
1526 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1527 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1529 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1532 * 8.3.6.2.1 Transform Type Selection
1533 * If variable-sized transform coding is not enabled,
1534 * then the 8x8 transform shall be used for all blocks.
1535 * it is also MFX_VC1_PIC_STATE requirement.
1537 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1538 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1539 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1543 if (picture_type == GEN7_VC1_B_PICTURE) {
1544 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1546 obj_surface = decode_state->reference_objects[1];
1549 gen7_vc1_surface = obj_surface->private_data;
1551 if (!gen7_vc1_surface ||
1552 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1553 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1554 dmv_surface_valid = 0;
1556 dmv_surface_valid = 1;
1559 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1561 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1562 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1564 if (pic_param->picture_fields.bits.top_field_first)
1570 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1571 brfd = pic_param->reference_fields.bits.reference_distance;
1572 brfd = (scale_factor * brfd) >> 8;
1573 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1580 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1581 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1582 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1586 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1587 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1590 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1591 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1592 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1594 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1595 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1601 assert(pic_param->conditional_overlap_flag < 3);
1602 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1604 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1605 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1606 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1607 interpolation_mode = 9; /* Half-pel bilinear */
1608 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1609 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1610 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1611 interpolation_mode = 1; /* Half-pel bicubic */
1613 interpolation_mode = 0; /* Quarter-pel bicubic */
1615 BEGIN_BCS_BATCH(batch, 6);
1616 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1617 OUT_BCS_BATCH(batch,
1618 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1619 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1620 OUT_BCS_BATCH(batch,
1621 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1622 dmv_surface_valid << 15 |
1623 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1624 pic_param->rounding_control << 13 |
1625 pic_param->sequence_fields.bits.syncmarker << 12 |
1626 interpolation_mode << 8 |
1627 0 << 7 | /* FIXME: scale up or down ??? */
1628 pic_param->range_reduction_frame << 6 |
1629 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1631 !pic_param->picture_fields.bits.is_first_field << 3 |
1632 (pic_param->sequence_fields.bits.profile == 3) << 0);
1633 OUT_BCS_BATCH(batch,
1634 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1635 picture_type << 26 |
1638 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1640 OUT_BCS_BATCH(batch,
1641 unified_mv_mode << 28 |
1642 pic_param->mv_fields.bits.four_mv_switch << 27 |
1643 pic_param->fast_uvmc_flag << 26 |
1644 ref_field_pic_polarity << 25 |
1645 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1646 pic_param->reference_fields.bits.reference_distance << 20 |
1647 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1648 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1649 pic_param->mv_fields.bits.extended_mv_range << 8 |
1650 alt_pquant_edge_mask << 4 |
1651 alt_pquant_config << 2 |
1652 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1653 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1654 OUT_BCS_BATCH(batch,
1655 !!pic_param->bitplane_present.value << 31 |
1656 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1657 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1658 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1659 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1660 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1661 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1662 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1663 pic_param->mv_fields.bits.mv_table << 20 |
1664 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1665 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1666 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1667 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1668 pic_param->mb_mode_table << 8 |
1670 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1671 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1672 pic_param->cbp_table << 0);
1673 ADVANCE_BCS_BATCH(batch);
1677 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1678 struct decode_state *decode_state,
1679 struct gen7_mfd_context *gen7_mfd_context)
1681 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1682 VAPictureParameterBufferVC1 *pic_param;
1683 int intensitycomp_single;
1685 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1686 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1688 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1689 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1690 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1692 BEGIN_BCS_BATCH(batch, 6);
1693 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1694 OUT_BCS_BATCH(batch,
1695 0 << 14 | /* FIXME: double ??? */
1697 intensitycomp_single << 10 |
1698 intensitycomp_single << 8 |
1699 0 << 4 | /* FIXME: interlace mode */
1701 OUT_BCS_BATCH(batch,
1702 pic_param->luma_shift << 16 |
1703 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1704 OUT_BCS_BATCH(batch, 0);
1705 OUT_BCS_BATCH(batch, 0);
1706 OUT_BCS_BATCH(batch, 0);
1707 ADVANCE_BCS_BATCH(batch);
1711 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1712 struct decode_state *decode_state,
1713 struct gen7_mfd_context *gen7_mfd_context)
1715 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1716 struct object_surface *obj_surface;
1717 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1719 obj_surface = decode_state->render_object;
1721 if (obj_surface && obj_surface->private_data) {
1722 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1725 obj_surface = decode_state->reference_objects[1];
1727 if (obj_surface && obj_surface->private_data) {
1728 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1731 BEGIN_BCS_BATCH(batch, 7);
1732 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1734 if (dmv_write_buffer)
1735 OUT_BCS_RELOC(batch, dmv_write_buffer,
1736 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1739 OUT_BCS_BATCH(batch, 0);
1741 OUT_BCS_BATCH(batch, 0);
1742 OUT_BCS_BATCH(batch, 0);
1744 if (dmv_read_buffer)
1745 OUT_BCS_RELOC(batch, dmv_read_buffer,
1746 I915_GEM_DOMAIN_INSTRUCTION, 0,
1749 OUT_BCS_BATCH(batch, 0);
1751 OUT_BCS_BATCH(batch, 0);
1752 OUT_BCS_BATCH(batch, 0);
1754 ADVANCE_BCS_BATCH(batch);
1758 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1760 int out_slice_data_bit_offset;
1761 int slice_header_size = in_slice_data_bit_offset / 8;
1765 out_slice_data_bit_offset = in_slice_data_bit_offset;
1767 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1768 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1773 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1776 return out_slice_data_bit_offset;
1780 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1781 VAPictureParameterBufferVC1 *pic_param,
1782 VASliceParameterBufferVC1 *slice_param,
1783 VASliceParameterBufferVC1 *next_slice_param,
1784 dri_bo *slice_data_bo,
1785 struct gen7_mfd_context *gen7_mfd_context)
1787 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1788 int next_slice_start_vert_pos;
1789 int macroblock_offset;
1790 uint8_t *slice_data = NULL;
1792 dri_bo_map(slice_data_bo, 0);
1793 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1794 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1795 slice_param->macroblock_offset,
1796 pic_param->sequence_fields.bits.profile);
1797 dri_bo_unmap(slice_data_bo);
1799 if (next_slice_param)
1800 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1802 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1804 BEGIN_BCS_BATCH(batch, 5);
1805 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1806 OUT_BCS_BATCH(batch,
1807 slice_param->slice_data_size - (macroblock_offset >> 3));
1808 OUT_BCS_BATCH(batch,
1809 slice_param->slice_data_offset + (macroblock_offset >> 3));
1810 OUT_BCS_BATCH(batch,
1811 slice_param->slice_vertical_position << 16 |
1812 next_slice_start_vert_pos << 0);
1813 OUT_BCS_BATCH(batch,
1814 (macroblock_offset & 0x7));
1815 ADVANCE_BCS_BATCH(batch);
1819 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1820 struct decode_state *decode_state,
1821 struct gen7_mfd_context *gen7_mfd_context)
1823 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1824 VAPictureParameterBufferVC1 *pic_param;
1825 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1826 dri_bo *slice_data_bo;
1829 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1830 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1832 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1833 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1834 intel_batchbuffer_emit_mi_flush(batch);
1835 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1836 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1837 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1838 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1839 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1840 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1841 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1843 for (j = 0; j < decode_state->num_slice_params; j++) {
1844 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1845 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1846 slice_data_bo = decode_state->slice_datas[j]->bo;
1847 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1849 if (j == decode_state->num_slice_params - 1)
1850 next_slice_group_param = NULL;
1852 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1854 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1855 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1857 if (i < decode_state->slice_params[j]->num_elements - 1)
1858 next_slice_param = slice_param + 1;
1860 next_slice_param = next_slice_group_param;
1862 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1867 intel_batchbuffer_end_atomic(batch);
1868 intel_batchbuffer_flush(batch);
1872 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1873 struct decode_state *decode_state,
1874 struct gen7_mfd_context *gen7_mfd_context)
1876 struct object_surface *obj_surface;
1877 VAPictureParameterBufferJPEGBaseline *pic_param;
1878 int subsampling = SUBSAMPLE_YUV420;
1879 int fourcc = VA_FOURCC_IMC3;
1881 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1883 if (pic_param->num_components == 1)
1884 subsampling = SUBSAMPLE_YUV400;
1885 else if (pic_param->num_components == 3) {
1886 int h1 = pic_param->components[0].h_sampling_factor;
1887 int h2 = pic_param->components[1].h_sampling_factor;
1888 int h3 = pic_param->components[2].h_sampling_factor;
1889 int v1 = pic_param->components[0].v_sampling_factor;
1890 int v2 = pic_param->components[1].v_sampling_factor;
1891 int v3 = pic_param->components[2].v_sampling_factor;
1893 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1894 v1 == 2 && v2 == 1 && v3 == 1) {
1895 subsampling = SUBSAMPLE_YUV420;
1896 fourcc = VA_FOURCC_IMC3;
1897 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1898 v1 == 1 && v2 == 1 && v3 == 1) {
1899 subsampling = SUBSAMPLE_YUV422H;
1900 fourcc = VA_FOURCC_422H;
1901 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1902 v1 == 1 && v2 == 1 && v3 == 1) {
1903 subsampling = SUBSAMPLE_YUV444;
1904 fourcc = VA_FOURCC_444P;
1905 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1906 v1 == 1 && v2 == 1 && v3 == 1) {
1907 subsampling = SUBSAMPLE_YUV411;
1908 fourcc = VA_FOURCC_411P;
1909 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1910 v1 == 2 && v2 == 1 && v3 == 1) {
1911 subsampling = SUBSAMPLE_YUV422V;
1912 fourcc = VA_FOURCC_422V;
1913 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1914 v1 == 2 && v2 == 2 && v3 == 2) {
1915 subsampling = SUBSAMPLE_YUV422H;
1916 fourcc = VA_FOURCC_422H;
1917 } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1918 v1 == 2 && v2 == 1 && v3 == 1) {
1919 subsampling = SUBSAMPLE_YUV422V;
1920 fourcc = VA_FOURCC_422V;
1928 /* Current decoded picture */
1929 obj_surface = decode_state->render_object;
1930 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1932 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1933 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1934 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1935 gen7_mfd_context->pre_deblocking_output.valid = 1;
1937 gen7_mfd_context->post_deblocking_output.bo = NULL;
1938 gen7_mfd_context->post_deblocking_output.valid = 0;
1940 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1941 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1943 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1944 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1946 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1947 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1949 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1950 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1952 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1953 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1956 static const int va_to_gen7_jpeg_rotation[4] = {
1957 GEN7_JPEG_ROTATION_0,
1958 GEN7_JPEG_ROTATION_90,
1959 GEN7_JPEG_ROTATION_180,
1960 GEN7_JPEG_ROTATION_270
1964 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1965 struct decode_state *decode_state,
1966 struct gen7_mfd_context *gen7_mfd_context)
1968 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1969 VAPictureParameterBufferJPEGBaseline *pic_param;
1970 int chroma_type = GEN7_YUV420;
1971 int frame_width_in_blks;
1972 int frame_height_in_blks;
1974 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1975 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1977 if (pic_param->num_components == 1)
1978 chroma_type = GEN7_YUV400;
1979 else if (pic_param->num_components == 3) {
1980 int h1 = pic_param->components[0].h_sampling_factor;
1981 int h2 = pic_param->components[1].h_sampling_factor;
1982 int h3 = pic_param->components[2].h_sampling_factor;
1983 int v1 = pic_param->components[0].v_sampling_factor;
1984 int v2 = pic_param->components[1].v_sampling_factor;
1985 int v3 = pic_param->components[2].v_sampling_factor;
1987 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1988 v1 == 2 && v2 == 1 && v3 == 1)
1989 chroma_type = GEN7_YUV420;
1990 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1991 v1 == 1 && v2 == 1 && v3 == 1)
1992 chroma_type = GEN7_YUV422H_2Y;
1993 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1994 v1 == 1 && v2 == 1 && v3 == 1)
1995 chroma_type = GEN7_YUV444;
1996 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1997 v1 == 1 && v2 == 1 && v3 == 1)
1998 chroma_type = GEN7_YUV411;
1999 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2000 v1 == 2 && v2 == 1 && v3 == 1)
2001 chroma_type = GEN7_YUV422V_2Y;
2002 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2003 v1 == 2 && v2 == 2 && v3 == 2)
2004 chroma_type = GEN7_YUV422H_4Y;
2005 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2006 v1 == 2 && v2 == 1 && v3 == 1)
2007 chroma_type = GEN7_YUV422V_4Y;
2012 if (chroma_type == GEN7_YUV400 ||
2013 chroma_type == GEN7_YUV444 ||
2014 chroma_type == GEN7_YUV422V_2Y) {
2015 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2016 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2017 } else if (chroma_type == GEN7_YUV411) {
2018 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2019 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2021 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2022 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2025 BEGIN_BCS_BATCH(batch, 3);
2026 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2027 OUT_BCS_BATCH(batch,
2028 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2029 (chroma_type << 0));
2030 OUT_BCS_BATCH(batch,
2031 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2032 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2033 ADVANCE_BCS_BATCH(batch);
2036 static const int va_to_gen7_jpeg_hufftable[2] = {
2042 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2043 struct decode_state *decode_state,
2044 struct gen7_mfd_context *gen7_mfd_context,
2047 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2048 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2051 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2054 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2056 for (index = 0; index < num_tables; index++) {
2057 int id = va_to_gen7_jpeg_hufftable[index];
2058 if (!huffman_table->load_huffman_table[index])
2060 BEGIN_BCS_BATCH(batch, 53);
2061 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2062 OUT_BCS_BATCH(batch, id);
2063 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2064 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2065 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2066 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2067 ADVANCE_BCS_BATCH(batch);
2071 static const int va_to_gen7_jpeg_qm[5] = {
2073 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2074 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2075 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2076 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2080 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2081 struct decode_state *decode_state,
2082 struct gen7_mfd_context *gen7_mfd_context)
2084 VAPictureParameterBufferJPEGBaseline *pic_param;
2085 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2088 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2091 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2092 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2094 assert(pic_param->num_components <= 3);
2096 for (index = 0; index < pic_param->num_components; index++) {
2097 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2099 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2100 unsigned char raster_qm[64];
2103 if (id > 4 || id < 1)
2106 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2109 qm_type = va_to_gen7_jpeg_qm[id];
2111 for (j = 0; j < 64; j++)
2112 raster_qm[zigzag_direct[j]] = qm[j];
2114 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2119 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2120 VAPictureParameterBufferJPEGBaseline *pic_param,
2121 VASliceParameterBufferJPEGBaseline *slice_param,
2122 VASliceParameterBufferJPEGBaseline *next_slice_param,
2123 dri_bo *slice_data_bo,
2124 struct gen7_mfd_context *gen7_mfd_context)
2126 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2127 int scan_component_mask = 0;
2130 assert(slice_param->num_components > 0);
2131 assert(slice_param->num_components < 4);
2132 assert(slice_param->num_components <= pic_param->num_components);
2134 for (i = 0; i < slice_param->num_components; i++) {
2135 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2137 scan_component_mask |= (1 << 0);
2140 scan_component_mask |= (1 << 1);
2143 scan_component_mask |= (1 << 2);
2151 BEGIN_BCS_BATCH(batch, 6);
2152 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2153 OUT_BCS_BATCH(batch,
2154 slice_param->slice_data_size);
2155 OUT_BCS_BATCH(batch,
2156 slice_param->slice_data_offset);
2157 OUT_BCS_BATCH(batch,
2158 slice_param->slice_horizontal_position << 16 |
2159 slice_param->slice_vertical_position << 0);
2160 OUT_BCS_BATCH(batch,
2161 ((slice_param->num_components != 1) << 30) | /* interleaved */
2162 (scan_component_mask << 27) | /* scan components */
2163 (0 << 26) | /* disable interrupt allowed */
2164 (slice_param->num_mcus << 0)); /* MCU count */
2165 OUT_BCS_BATCH(batch,
2166 (slice_param->restart_interval << 0)); /* RestartInterval */
2167 ADVANCE_BCS_BATCH(batch);
2170 /* Workaround for JPEG decoding on Ivybridge */
2174 i965_CreateSurfaces(VADriverContextP ctx,
2179 VASurfaceID *surfaces);
2184 unsigned char data[32];
2186 int data_bit_offset;
2188 } gen7_jpeg_wa_clip = {
2192 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2193 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2201 gen8_jpeg_wa_init(VADriverContextP ctx,
2202 struct gen7_mfd_context *gen7_mfd_context)
2204 struct i965_driver_data *i965 = i965_driver_data(ctx);
2206 struct object_surface *obj_surface;
2208 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2209 i965_DestroySurfaces(ctx,
2210 &gen7_mfd_context->jpeg_wa_surface_id,
2213 status = i965_CreateSurfaces(ctx,
2214 gen7_jpeg_wa_clip.width,
2215 gen7_jpeg_wa_clip.height,
2216 VA_RT_FORMAT_YUV420,
2218 &gen7_mfd_context->jpeg_wa_surface_id);
2219 assert(status == VA_STATUS_SUCCESS);
2221 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2222 assert(obj_surface);
2223 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2224 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2226 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2227 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2231 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2233 gen7_jpeg_wa_clip.data_size,
2234 gen7_jpeg_wa_clip.data);
2239 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2240 struct gen7_mfd_context *gen7_mfd_context)
2242 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2244 BEGIN_BCS_BATCH(batch, 5);
2245 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2246 OUT_BCS_BATCH(batch,
2247 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2248 (MFD_MODE_VLD << 15) | /* VLD mode */
2249 (0 << 10) | /* disable Stream-Out */
2250 (0 << 9) | /* Post Deblocking Output */
2251 (1 << 8) | /* Pre Deblocking Output */
2252 (0 << 5) | /* not in stitch mode */
2253 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2254 (MFX_FORMAT_AVC << 0));
2255 OUT_BCS_BATCH(batch,
2256 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2257 (0 << 3) | /* terminate if AVC mbdata error occurs */
2258 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2261 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2262 OUT_BCS_BATCH(batch, 0); /* reserved */
2263 ADVANCE_BCS_BATCH(batch);
2267 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2268 struct gen7_mfd_context *gen7_mfd_context)
2270 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2271 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2273 BEGIN_BCS_BATCH(batch, 6);
2274 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2275 OUT_BCS_BATCH(batch, 0);
2276 OUT_BCS_BATCH(batch,
2277 ((obj_surface->orig_width - 1) << 18) |
2278 ((obj_surface->orig_height - 1) << 4));
2279 OUT_BCS_BATCH(batch,
2280 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2281 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2282 (0 << 22) | /* surface object control state, ignored */
2283 ((obj_surface->width - 1) << 3) | /* pitch */
2284 (0 << 2) | /* must be 0 */
2285 (1 << 1) | /* must be tiled */
2286 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2287 OUT_BCS_BATCH(batch,
2288 (0 << 16) | /* X offset for U(Cb), must be 0 */
2289 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2290 OUT_BCS_BATCH(batch,
2291 (0 << 16) | /* X offset for V(Cr), must be 0 */
2292 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2293 ADVANCE_BCS_BATCH(batch);
2297 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2298 struct gen7_mfd_context *gen7_mfd_context)
2300 struct i965_driver_data *i965 = i965_driver_data(ctx);
2301 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2302 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2306 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2311 BEGIN_BCS_BATCH(batch, 61);
2312 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2313 OUT_BCS_RELOC(batch,
2315 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2317 OUT_BCS_BATCH(batch, 0);
2318 OUT_BCS_BATCH(batch, 0);
2321 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2322 OUT_BCS_BATCH(batch, 0);
2323 OUT_BCS_BATCH(batch, 0);
2325 /* uncompressed-video & stream out 7-12 */
2326 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2327 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2328 OUT_BCS_BATCH(batch, 0);
2329 OUT_BCS_BATCH(batch, 0);
2330 OUT_BCS_BATCH(batch, 0);
2331 OUT_BCS_BATCH(batch, 0);
2333 /* the DW 13-15 is for intra row store scratch */
2334 OUT_BCS_RELOC(batch,
2336 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2338 OUT_BCS_BATCH(batch, 0);
2339 OUT_BCS_BATCH(batch, 0);
2341 /* the DW 16-18 is for deblocking filter */
2342 OUT_BCS_BATCH(batch, 0);
2343 OUT_BCS_BATCH(batch, 0);
2344 OUT_BCS_BATCH(batch, 0);
2347 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2348 OUT_BCS_BATCH(batch, 0);
2349 OUT_BCS_BATCH(batch, 0);
2351 OUT_BCS_BATCH(batch, 0);
2353 /* the DW52-54 is for mb status address */
2354 OUT_BCS_BATCH(batch, 0);
2355 OUT_BCS_BATCH(batch, 0);
2356 OUT_BCS_BATCH(batch, 0);
2357 /* the DW56-60 is for ILDB & second ILDB address */
2358 OUT_BCS_BATCH(batch, 0);
2359 OUT_BCS_BATCH(batch, 0);
2360 OUT_BCS_BATCH(batch, 0);
2361 OUT_BCS_BATCH(batch, 0);
2362 OUT_BCS_BATCH(batch, 0);
2363 OUT_BCS_BATCH(batch, 0);
2365 ADVANCE_BCS_BATCH(batch);
2367 dri_bo_unreference(intra_bo);
2371 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2372 struct gen7_mfd_context *gen7_mfd_context)
2374 struct i965_driver_data *i965 = i965_driver_data(ctx);
2375 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2376 dri_bo *bsd_mpc_bo, *mpr_bo;
2378 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2379 "bsd mpc row store",
2380 11520, /* 1.5 * 120 * 64 */
2383 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2385 7680, /* 1. 0 * 120 * 64 */
2388 BEGIN_BCS_BATCH(batch, 10);
2389 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2391 OUT_BCS_RELOC(batch,
2393 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2396 OUT_BCS_BATCH(batch, 0);
2397 OUT_BCS_BATCH(batch, 0);
2399 OUT_BCS_RELOC(batch,
2401 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2403 OUT_BCS_BATCH(batch, 0);
2404 OUT_BCS_BATCH(batch, 0);
2406 OUT_BCS_BATCH(batch, 0);
2407 OUT_BCS_BATCH(batch, 0);
2408 OUT_BCS_BATCH(batch, 0);
2410 ADVANCE_BCS_BATCH(batch);
2412 dri_bo_unreference(bsd_mpc_bo);
2413 dri_bo_unreference(mpr_bo);
2417 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2418 struct gen7_mfd_context *gen7_mfd_context)
2424 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2425 struct gen7_mfd_context *gen7_mfd_context)
2427 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2429 int mbaff_frame_flag = 0;
2430 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2432 BEGIN_BCS_BATCH(batch, 16);
2433 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2434 OUT_BCS_BATCH(batch,
2435 width_in_mbs * height_in_mbs);
2436 OUT_BCS_BATCH(batch,
2437 ((height_in_mbs - 1) << 16) |
2438 ((width_in_mbs - 1) << 0));
2439 OUT_BCS_BATCH(batch,
2444 (0 << 12) | /* differ from GEN6 */
2447 OUT_BCS_BATCH(batch,
2448 (1 << 10) | /* 4:2:0 */
2449 (1 << 7) | /* CABAC */
2455 (mbaff_frame_flag << 1) |
2457 OUT_BCS_BATCH(batch, 0);
2458 OUT_BCS_BATCH(batch, 0);
2459 OUT_BCS_BATCH(batch, 0);
2460 OUT_BCS_BATCH(batch, 0);
2461 OUT_BCS_BATCH(batch, 0);
2462 OUT_BCS_BATCH(batch, 0);
2463 OUT_BCS_BATCH(batch, 0);
2464 OUT_BCS_BATCH(batch, 0);
2465 OUT_BCS_BATCH(batch, 0);
2466 OUT_BCS_BATCH(batch, 0);
2467 OUT_BCS_BATCH(batch, 0);
2468 ADVANCE_BCS_BATCH(batch);
2472 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2473 struct gen7_mfd_context *gen7_mfd_context)
2475 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2478 BEGIN_BCS_BATCH(batch, 71);
2479 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2481 /* reference surfaces 0..15 */
2482 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2483 OUT_BCS_BATCH(batch, 0); /* top */
2484 OUT_BCS_BATCH(batch, 0); /* bottom */
2487 OUT_BCS_BATCH(batch, 0);
2489 /* the current decoding frame/field */
2490 OUT_BCS_BATCH(batch, 0); /* top */
2491 OUT_BCS_BATCH(batch, 0);
2492 OUT_BCS_BATCH(batch, 0);
2495 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2496 OUT_BCS_BATCH(batch, 0);
2497 OUT_BCS_BATCH(batch, 0);
2500 OUT_BCS_BATCH(batch, 0);
2501 OUT_BCS_BATCH(batch, 0);
2503 ADVANCE_BCS_BATCH(batch);
2507 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2508 struct gen7_mfd_context *gen7_mfd_context)
2510 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2512 BEGIN_BCS_BATCH(batch, 11);
2513 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2514 OUT_BCS_RELOC(batch,
2515 gen7_mfd_context->jpeg_wa_slice_data_bo,
2516 I915_GEM_DOMAIN_INSTRUCTION, 0,
2518 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2519 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2520 OUT_BCS_BATCH(batch, 0);
2521 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2522 OUT_BCS_BATCH(batch, 0);
2523 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2524 OUT_BCS_BATCH(batch, 0);
2525 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2526 OUT_BCS_BATCH(batch, 0);
2527 ADVANCE_BCS_BATCH(batch);
2531 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2532 struct gen7_mfd_context *gen7_mfd_context)
2534 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2536 /* the input bitsteam format on GEN7 differs from GEN6 */
2537 BEGIN_BCS_BATCH(batch, 6);
2538 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2539 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2540 OUT_BCS_BATCH(batch, 0);
2541 OUT_BCS_BATCH(batch,
2547 OUT_BCS_BATCH(batch,
2548 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2551 (1 << 3) | /* LastSlice Flag */
2552 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2553 OUT_BCS_BATCH(batch, 0);
2554 ADVANCE_BCS_BATCH(batch);
2558 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2559 struct gen7_mfd_context *gen7_mfd_context)
2561 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2562 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2563 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2564 int first_mb_in_slice = 0;
2565 int slice_type = SLICE_TYPE_I;
2567 BEGIN_BCS_BATCH(batch, 11);
2568 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2569 OUT_BCS_BATCH(batch, slice_type);
2570 OUT_BCS_BATCH(batch,
2571 (num_ref_idx_l1 << 24) |
2572 (num_ref_idx_l0 << 16) |
2575 OUT_BCS_BATCH(batch,
2577 (1 << 27) | /* disable Deblocking */
2579 (gen7_jpeg_wa_clip.qp << 16) |
2582 OUT_BCS_BATCH(batch,
2583 (slice_ver_pos << 24) |
2584 (slice_hor_pos << 16) |
2585 (first_mb_in_slice << 0));
2586 OUT_BCS_BATCH(batch,
2587 (next_slice_ver_pos << 16) |
2588 (next_slice_hor_pos << 0));
2589 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2590 OUT_BCS_BATCH(batch, 0);
2591 OUT_BCS_BATCH(batch, 0);
2592 OUT_BCS_BATCH(batch, 0);
2593 OUT_BCS_BATCH(batch, 0);
2594 ADVANCE_BCS_BATCH(batch);
2598 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2599 struct gen7_mfd_context *gen7_mfd_context)
2601 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2602 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2603 intel_batchbuffer_emit_mi_flush(batch);
2604 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2605 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2606 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2607 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2608 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2609 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2610 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2612 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2613 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2614 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2620 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2621 struct decode_state *decode_state,
2622 struct gen7_mfd_context *gen7_mfd_context)
2624 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2625 VAPictureParameterBufferJPEGBaseline *pic_param;
2626 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2627 dri_bo *slice_data_bo;
2628 int i, j, max_selector = 0;
2630 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2631 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2633 /* Currently only support Baseline DCT */
2634 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2635 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2637 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2639 intel_batchbuffer_emit_mi_flush(batch);
2640 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2641 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2642 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2643 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2644 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2646 for (j = 0; j < decode_state->num_slice_params; j++) {
2647 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2648 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2649 slice_data_bo = decode_state->slice_datas[j]->bo;
2650 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2652 if (j == decode_state->num_slice_params - 1)
2653 next_slice_group_param = NULL;
2655 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2657 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2660 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2662 if (i < decode_state->slice_params[j]->num_elements - 1)
2663 next_slice_param = slice_param + 1;
2665 next_slice_param = next_slice_group_param;
2667 for (component = 0; component < slice_param->num_components; component++) {
2668 if (max_selector < slice_param->components[component].dc_table_selector)
2669 max_selector = slice_param->components[component].dc_table_selector;
2671 if (max_selector < slice_param->components[component].ac_table_selector)
2672 max_selector = slice_param->components[component].ac_table_selector;
2679 assert(max_selector < 2);
2680 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2682 for (j = 0; j < decode_state->num_slice_params; j++) {
2683 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2684 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2685 slice_data_bo = decode_state->slice_datas[j]->bo;
2686 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2688 if (j == decode_state->num_slice_params - 1)
2689 next_slice_group_param = NULL;
2691 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2693 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2694 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2696 if (i < decode_state->slice_params[j]->num_elements - 1)
2697 next_slice_param = slice_param + 1;
2699 next_slice_param = next_slice_group_param;
2701 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2706 intel_batchbuffer_end_atomic(batch);
2707 intel_batchbuffer_flush(batch);
2710 static const int vp8_dc_qlookup[128] =
2712 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2713 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2714 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2715 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2716 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2717 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2718 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2719 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2722 static const int vp8_ac_qlookup[128] =
2724 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2725 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2726 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2727 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2728 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2729 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2730 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2731 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2734 static inline unsigned int vp8_clip_quantization_index(int index)
2745 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2746 struct decode_state *decode_state,
2747 struct gen7_mfd_context *gen7_mfd_context)
2749 struct object_surface *obj_surface;
2750 struct i965_driver_data *i965 = i965_driver_data(ctx);
2752 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2753 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2754 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2756 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2757 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2759 intel_update_vp8_frame_store_index(ctx,
2762 gen7_mfd_context->reference_surface);
2764 /* Current decoded picture */
2765 obj_surface = decode_state->render_object;
2766 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2768 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2769 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2770 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2771 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2773 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2774 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2775 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2776 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2778 intel_ensure_vp8_segmentation_buffer(ctx,
2779 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2781 /* The same as AVC */
2782 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2783 bo = dri_bo_alloc(i965->intel.bufmgr,
2788 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2789 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2791 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2792 bo = dri_bo_alloc(i965->intel.bufmgr,
2793 "deblocking filter row store",
2794 width_in_mbs * 64 * 4,
2797 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2798 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2800 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2801 bo = dri_bo_alloc(i965->intel.bufmgr,
2802 "bsd mpc row store",
2803 width_in_mbs * 64 * 2,
2806 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2807 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2809 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2810 bo = dri_bo_alloc(i965->intel.bufmgr,
2812 width_in_mbs * 64 * 2,
2815 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2816 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2818 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2822 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2823 struct decode_state *decode_state,
2824 struct gen7_mfd_context *gen7_mfd_context)
2826 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2827 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2828 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2829 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2830 dri_bo *probs_bo = decode_state->probability_data->bo;
2832 unsigned int quantization_value[4][6];
2834 /* There is no safe way to error out if the segmentation buffer
2835 could not be allocated. So, instead of aborting, simply decode
2836 something even if the result may look totally inacurate */
2837 const unsigned int enable_segmentation =
2838 pic_param->pic_fields.bits.segmentation_enabled &&
2839 gen7_mfd_context->segmentation_buffer.valid;
2841 log2num = (int)log2(slice_param->num_of_partitions - 1);
2843 BEGIN_BCS_BATCH(batch, 38);
2844 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2845 OUT_BCS_BATCH(batch,
2846 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2847 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2848 OUT_BCS_BATCH(batch,
2850 pic_param->pic_fields.bits.sharpness_level << 16 |
2851 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2852 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2853 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2854 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2855 pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2856 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2857 (enable_segmentation &&
2858 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2859 (enable_segmentation &&
2860 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2861 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2862 pic_param->pic_fields.bits.filter_type << 4 |
2863 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2864 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2866 OUT_BCS_BATCH(batch,
2867 pic_param->loop_filter_level[3] << 24 |
2868 pic_param->loop_filter_level[2] << 16 |
2869 pic_param->loop_filter_level[1] << 8 |
2870 pic_param->loop_filter_level[0] << 0);
2872 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2873 for (i = 0; i < 4; i++) {
2874 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2875 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2876 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2877 /* 101581>>16 is equivalent to 155/100 */
2878 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2879 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2880 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2882 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2883 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2885 OUT_BCS_BATCH(batch,
2886 quantization_value[i][0] << 16 | /* Y1AC */
2887 quantization_value[i][1] << 0); /* Y1DC */
2888 OUT_BCS_BATCH(batch,
2889 quantization_value[i][5] << 16 | /* UVAC */
2890 quantization_value[i][4] << 0); /* UVDC */
2891 OUT_BCS_BATCH(batch,
2892 quantization_value[i][3] << 16 | /* Y2AC */
2893 quantization_value[i][2] << 0); /* Y2DC */
2896 /* CoeffProbability table for non-key frame, DW16-DW18 */
2898 OUT_BCS_RELOC(batch, probs_bo,
2899 0, I915_GEM_DOMAIN_INSTRUCTION,
2901 OUT_BCS_BATCH(batch, 0);
2902 OUT_BCS_BATCH(batch, 0);
2904 OUT_BCS_BATCH(batch, 0);
2905 OUT_BCS_BATCH(batch, 0);
2906 OUT_BCS_BATCH(batch, 0);
2909 OUT_BCS_BATCH(batch,
2910 pic_param->mb_segment_tree_probs[2] << 16 |
2911 pic_param->mb_segment_tree_probs[1] << 8 |
2912 pic_param->mb_segment_tree_probs[0] << 0);
2914 OUT_BCS_BATCH(batch,
2915 pic_param->prob_skip_false << 24 |
2916 pic_param->prob_intra << 16 |
2917 pic_param->prob_last << 8 |
2918 pic_param->prob_gf << 0);
2920 OUT_BCS_BATCH(batch,
2921 pic_param->y_mode_probs[3] << 24 |
2922 pic_param->y_mode_probs[2] << 16 |
2923 pic_param->y_mode_probs[1] << 8 |
2924 pic_param->y_mode_probs[0] << 0);
2926 OUT_BCS_BATCH(batch,
2927 pic_param->uv_mode_probs[2] << 16 |
2928 pic_param->uv_mode_probs[1] << 8 |
2929 pic_param->uv_mode_probs[0] << 0);
2931 /* MV update value, DW23-DW32 */
2932 for (i = 0; i < 2; i++) {
2933 for (j = 0; j < 20; j += 4) {
2934 OUT_BCS_BATCH(batch,
2935 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2936 pic_param->mv_probs[i][j + 2] << 16 |
2937 pic_param->mv_probs[i][j + 1] << 8 |
2938 pic_param->mv_probs[i][j + 0] << 0);
2942 OUT_BCS_BATCH(batch,
2943 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2944 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2945 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2946 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2948 OUT_BCS_BATCH(batch,
2949 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2950 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2951 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2952 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2954 /* segmentation id stream base address, DW35-DW37 */
2955 if (enable_segmentation) {
2956 OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2957 0, I915_GEM_DOMAIN_INSTRUCTION,
2959 OUT_BCS_BATCH(batch, 0);
2960 OUT_BCS_BATCH(batch, 0);
2963 OUT_BCS_BATCH(batch, 0);
2964 OUT_BCS_BATCH(batch, 0);
2965 OUT_BCS_BATCH(batch, 0);
2967 ADVANCE_BCS_BATCH(batch);
2971 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2972 VAPictureParameterBufferVP8 *pic_param,
2973 VASliceParameterBufferVP8 *slice_param,
2974 dri_bo *slice_data_bo,
2975 struct gen7_mfd_context *gen7_mfd_context)
2977 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2979 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2980 unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2981 unsigned int partition_size_0 = slice_param->partition_size[0];
2983 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2984 if (used_bits == 8) {
2987 partition_size_0 -= 1;
2990 assert(slice_param->num_of_partitions >= 2);
2991 assert(slice_param->num_of_partitions <= 9);
2993 log2num = (int)log2(slice_param->num_of_partitions - 1);
2995 BEGIN_BCS_BATCH(batch, 22);
2996 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2997 OUT_BCS_BATCH(batch,
2998 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2999 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
3001 (slice_param->macroblock_offset & 0x7));
3002 OUT_BCS_BATCH(batch,
3003 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3006 OUT_BCS_BATCH(batch, partition_size_0);
3007 OUT_BCS_BATCH(batch, offset);
3008 //partion sizes in bytes are present after the above first partition when there are more than one token partition
3009 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3010 for (i = 1; i < 9; i++) {
3011 if (i < slice_param->num_of_partitions) {
3012 OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
3013 OUT_BCS_BATCH(batch, offset);
3015 OUT_BCS_BATCH(batch, 0);
3016 OUT_BCS_BATCH(batch, 0);
3019 offset += slice_param->partition_size[i];
3022 OUT_BCS_BATCH(batch,
3023 1 << 31 | /* concealment method */
3026 ADVANCE_BCS_BATCH(batch);
3030 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3031 struct decode_state *decode_state,
3032 struct gen7_mfd_context *gen7_mfd_context)
3034 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3035 VAPictureParameterBufferVP8 *pic_param;
3036 VASliceParameterBufferVP8 *slice_param;
3037 dri_bo *slice_data_bo;
3039 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3040 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3042 /* one slice per frame */
3043 if (decode_state->num_slice_params != 1 ||
3044 (!decode_state->slice_params ||
3045 !decode_state->slice_params[0] ||
3046 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3047 (!decode_state->slice_datas ||
3048 !decode_state->slice_datas[0] ||
3049 !decode_state->slice_datas[0]->bo) ||
3050 !decode_state->probability_data) {
3051 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3056 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3057 slice_data_bo = decode_state->slice_datas[0]->bo;
3059 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3060 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3061 intel_batchbuffer_emit_mi_flush(batch);
3062 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3063 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3064 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3065 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3066 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3067 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3068 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3069 intel_batchbuffer_end_atomic(batch);
3070 intel_batchbuffer_flush(batch);
3074 gen8_mfd_decode_picture(VADriverContextP ctx,
3076 union codec_state *codec_state,
3077 struct hw_context *hw_context)
3080 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3081 struct decode_state *decode_state = &codec_state->decode;
3084 assert(gen7_mfd_context);
3086 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3088 if (vaStatus != VA_STATUS_SUCCESS)
3091 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3094 case VAProfileMPEG2Simple:
3095 case VAProfileMPEG2Main:
3096 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3099 case VAProfileH264ConstrainedBaseline:
3100 case VAProfileH264Main:
3101 case VAProfileH264High:
3102 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3105 case VAProfileVC1Simple:
3106 case VAProfileVC1Main:
3107 case VAProfileVC1Advanced:
3108 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3111 case VAProfileJPEGBaseline:
3112 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3115 case VAProfileVP8Version0_3:
3116 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3124 vaStatus = VA_STATUS_SUCCESS;
3131 gen8_mfd_context_destroy(void *hw_context)
3133 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3135 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3136 gen7_mfd_context->post_deblocking_output.bo = NULL;
3138 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3139 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3141 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3142 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3144 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3145 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3147 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3148 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3150 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3151 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3153 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3154 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3156 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3157 gen7_mfd_context->segmentation_buffer.bo = NULL;
3159 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3161 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3162 free(gen7_mfd_context);
3165 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3166 struct gen7_mfd_context *gen7_mfd_context)
3168 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3169 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3170 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3171 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3175 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3177 struct intel_driver_data *intel = intel_driver_data(ctx);
3178 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3181 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3182 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3183 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3185 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3186 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3187 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3190 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3191 gen7_mfd_context->segmentation_buffer.valid = 0;
3193 switch (obj_config->profile) {
3194 case VAProfileMPEG2Simple:
3195 case VAProfileMPEG2Main:
3196 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3199 case VAProfileH264ConstrainedBaseline:
3200 case VAProfileH264Main:
3201 case VAProfileH264High:
3202 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3207 return (struct hw_context *)gen7_mfd_context;