2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
78 if (!gen7_avc_surface)
81 gen7_avc_surface->base.frame_store_id = -1;
82 assert((obj_surface->size & 0x3f) == 0);
83 obj_surface->private_data = gen7_avc_surface;
86 /* DMV buffers now relate to the whole frame, irrespective of
88 if (gen7_avc_surface->dmv_top == NULL) {
89 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
90 "direct mv w/r buffer",
91 width_in_mbs * height_in_mbs * 128,
93 assert(gen7_avc_surface->dmv_top);
98 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
99 struct decode_state *decode_state,
101 struct gen7_mfd_context *gen7_mfd_context)
103 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
105 assert(standard_select == MFX_FORMAT_MPEG2 ||
106 standard_select == MFX_FORMAT_AVC ||
107 standard_select == MFX_FORMAT_VC1 ||
108 standard_select == MFX_FORMAT_JPEG ||
109 standard_select == MFX_FORMAT_VP8);
111 BEGIN_BCS_BATCH(batch, 5);
112 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
114 (MFX_LONG_MODE << 17) | /* Currently only support long format */
115 (MFD_MODE_VLD << 15) | /* VLD mode */
116 (0 << 10) | /* disable Stream-Out */
117 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
118 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
119 (0 << 5) | /* not in stitch mode */
120 (MFX_CODEC_DECODE << 4) | /* decoding mode */
121 (standard_select << 0));
123 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
124 (0 << 3) | /* terminate if AVC mbdata error occurs */
125 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
128 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
129 OUT_BCS_BATCH(batch, 0); /* reserved */
130 ADVANCE_BCS_BATCH(batch);
134 gen8_mfd_surface_state(VADriverContextP ctx,
135 struct decode_state *decode_state,
137 struct gen7_mfd_context *gen7_mfd_context)
139 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
140 struct object_surface *obj_surface = decode_state->render_object;
141 unsigned int y_cb_offset;
142 unsigned int y_cr_offset;
143 unsigned int surface_format;
147 y_cb_offset = obj_surface->y_cb_offset;
148 y_cr_offset = obj_surface->y_cr_offset;
150 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
151 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
153 BEGIN_BCS_BATCH(batch, 6);
154 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
155 OUT_BCS_BATCH(batch, 0);
157 ((obj_surface->orig_height - 1) << 18) |
158 ((obj_surface->orig_width - 1) << 4));
160 (surface_format << 28) | /* 420 planar YUV surface */
161 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
162 (0 << 22) | /* surface object control state, ignored */
163 ((obj_surface->width - 1) << 3) | /* pitch */
164 (0 << 2) | /* must be 0 */
165 (1 << 1) | /* must be tiled */
166 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
168 (0 << 16) | /* X offset for U(Cb), must be 0 */
169 (y_cb_offset << 0)); /* Y offset for U(Cb) */
171 (0 << 16) | /* X offset for V(Cr), must be 0 */
172 ((standard_select == MFX_FORMAT_JPEG ? y_cr_offset : 0) << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
173 ADVANCE_BCS_BATCH(batch);
177 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
178 struct decode_state *decode_state,
180 struct gen7_mfd_context *gen7_mfd_context)
182 struct i965_driver_data *i965 = i965_driver_data(ctx);
183 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
186 BEGIN_BCS_BATCH(batch, 61);
187 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188 /* Pre-deblock 1-3 */
189 if (gen7_mfd_context->pre_deblocking_output.valid)
190 OUT_BCS_RELOC64(batch, gen7_mfd_context->pre_deblocking_output.bo,
191 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194 OUT_BCS_BATCH(batch, 0);
196 OUT_BCS_BATCH(batch, 0);
198 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
200 /* Post-debloing 4-6 */
201 if (gen7_mfd_context->post_deblocking_output.valid)
202 OUT_BCS_RELOC64(batch, gen7_mfd_context->post_deblocking_output.bo,
203 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
206 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
210 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
212 /* uncompressed-video & stream out 7-12 */
213 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
214 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
215 OUT_BCS_BATCH(batch, 0);
216 OUT_BCS_BATCH(batch, 0);
217 OUT_BCS_BATCH(batch, 0);
218 OUT_BCS_BATCH(batch, 0);
220 /* intra row-store scratch 13-15 */
221 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
222 OUT_BCS_RELOC64(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
223 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
226 OUT_BCS_BATCH(batch, 0);
228 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
232 /* deblocking-filter-row-store 16-18 */
233 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
234 OUT_BCS_RELOC64(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
235 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
238 OUT_BCS_BATCH(batch, 0);
239 OUT_BCS_BATCH(batch, 0);
242 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
245 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
246 struct object_surface *obj_surface;
248 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
249 gen7_mfd_context->reference_surface[i].obj_surface &&
250 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
251 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
253 OUT_BCS_RELOC64(batch, obj_surface->bo,
254 I915_GEM_DOMAIN_INSTRUCTION, 0,
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
263 /* reference property 51 */
264 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
266 /* Macroblock status & ILDB 52-57 */
267 OUT_BCS_BATCH(batch, 0);
268 OUT_BCS_BATCH(batch, 0);
269 OUT_BCS_BATCH(batch, 0);
270 OUT_BCS_BATCH(batch, 0);
271 OUT_BCS_BATCH(batch, 0);
272 OUT_BCS_BATCH(batch, 0);
274 /* the second Macroblock status 58-60 */
275 OUT_BCS_BATCH(batch, 0);
276 OUT_BCS_BATCH(batch, 0);
277 OUT_BCS_BATCH(batch, 0);
279 ADVANCE_BCS_BATCH(batch);
283 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
284 dri_bo *slice_data_bo,
286 struct gen7_mfd_context *gen7_mfd_context)
288 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
289 struct i965_driver_data *i965 = i965_driver_data(ctx);
291 BEGIN_BCS_BATCH(batch, 26);
292 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
294 OUT_BCS_RELOC64(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
295 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
296 /* Upper bound 4-5 */
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
300 /* MFX indirect MV 6-10 */
301 OUT_BCS_BATCH(batch, 0);
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
307 /* MFX IT_COFF 11-15 */
308 OUT_BCS_BATCH(batch, 0);
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
314 /* MFX IT_DBLK 16-20 */
315 OUT_BCS_BATCH(batch, 0);
316 OUT_BCS_BATCH(batch, 0);
317 OUT_BCS_BATCH(batch, 0);
318 OUT_BCS_BATCH(batch, 0);
319 OUT_BCS_BATCH(batch, 0);
321 /* MFX PAK_BSE object for encoder 21-25 */
322 OUT_BCS_BATCH(batch, 0);
323 OUT_BCS_BATCH(batch, 0);
324 OUT_BCS_BATCH(batch, 0);
325 OUT_BCS_BATCH(batch, 0);
326 OUT_BCS_BATCH(batch, 0);
328 ADVANCE_BCS_BATCH(batch);
332 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
333 struct decode_state *decode_state,
335 struct gen7_mfd_context *gen7_mfd_context)
337 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
338 struct i965_driver_data *i965 = i965_driver_data(ctx);
340 BEGIN_BCS_BATCH(batch, 10);
341 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
343 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
344 OUT_BCS_RELOC64(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
345 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
348 OUT_BCS_BATCH(batch, 0);
349 OUT_BCS_BATCH(batch, 0);
352 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
353 /* MPR Row Store Scratch buffer 4-6 */
354 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
355 OUT_BCS_RELOC64(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
356 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
359 OUT_BCS_BATCH(batch, 0);
360 OUT_BCS_BATCH(batch, 0);
363 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
366 if (gen7_mfd_context->bitplane_read_buffer.valid)
367 OUT_BCS_RELOC64(batch, gen7_mfd_context->bitplane_read_buffer.bo,
368 I915_GEM_DOMAIN_INSTRUCTION, 0,
371 OUT_BCS_BATCH(batch, 0);
372 OUT_BCS_BATCH(batch, 0);
374 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
375 ADVANCE_BCS_BATCH(batch);
379 gen8_mfd_qm_state(VADriverContextP ctx,
383 struct gen7_mfd_context *gen7_mfd_context)
385 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
386 unsigned int qm_buffer[16];
388 assert(qm_length <= 16 * 4);
389 memcpy(qm_buffer, qm, qm_length);
391 BEGIN_BCS_BATCH(batch, 18);
392 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
393 OUT_BCS_BATCH(batch, qm_type << 0);
394 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
395 ADVANCE_BCS_BATCH(batch);
399 gen8_mfd_avc_img_state(VADriverContextP ctx,
400 struct decode_state *decode_state,
401 struct gen7_mfd_context *gen7_mfd_context)
403 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
405 int mbaff_frame_flag;
406 unsigned int width_in_mbs, height_in_mbs;
407 VAPictureParameterBufferH264 *pic_param;
409 assert(decode_state->pic_param && decode_state->pic_param->buffer);
410 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
413 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
415 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
420 if ((img_struct & 0x1) == 0x1) {
421 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
423 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
426 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
427 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
428 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
430 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
433 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
434 !pic_param->pic_fields.bits.field_pic_flag);
436 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
437 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
439 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
440 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
441 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
442 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
444 BEGIN_BCS_BATCH(batch, 17);
445 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
447 (width_in_mbs * height_in_mbs - 1));
449 ((height_in_mbs - 1) << 16) |
450 ((width_in_mbs - 1) << 0));
452 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
453 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
454 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
455 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
456 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
457 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
460 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
461 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
462 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
463 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
464 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
465 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
466 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
467 (mbaff_frame_flag << 1) |
468 (pic_param->pic_fields.bits.field_pic_flag << 0));
469 OUT_BCS_BATCH(batch, 0);
470 OUT_BCS_BATCH(batch, 0);
471 OUT_BCS_BATCH(batch, 0);
472 OUT_BCS_BATCH(batch, 0);
473 OUT_BCS_BATCH(batch, 0);
474 OUT_BCS_BATCH(batch, 0);
475 OUT_BCS_BATCH(batch, 0);
476 OUT_BCS_BATCH(batch, 0);
477 OUT_BCS_BATCH(batch, 0);
478 OUT_BCS_BATCH(batch, 0);
479 OUT_BCS_BATCH(batch, 0);
480 OUT_BCS_BATCH(batch, 0);
481 ADVANCE_BCS_BATCH(batch);
485 gen8_mfd_avc_qm_state(VADriverContextP ctx,
486 struct decode_state *decode_state,
487 struct gen7_mfd_context *gen7_mfd_context)
489 VAIQMatrixBufferH264 *iq_matrix;
490 VAPictureParameterBufferH264 *pic_param;
492 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
493 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
495 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
497 assert(decode_state->pic_param && decode_state->pic_param->buffer);
498 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
500 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
501 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
503 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
504 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
505 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
510 gen8_mfd_avc_picid_state(VADriverContextP ctx,
511 struct decode_state *decode_state,
512 struct gen7_mfd_context *gen7_mfd_context)
514 gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
515 gen7_mfd_context->reference_surface);
519 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
520 struct decode_state *decode_state,
521 VAPictureParameterBufferH264 *pic_param,
522 VASliceParameterBufferH264 *slice_param,
523 struct gen7_mfd_context *gen7_mfd_context)
525 struct i965_driver_data *i965 = i965_driver_data(ctx);
526 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527 struct object_surface *obj_surface;
528 GenAvcSurface *gen7_avc_surface;
529 VAPictureH264 *va_pic;
532 BEGIN_BCS_BATCH(batch, 71);
533 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
535 /* reference surfaces 0..15 */
536 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538 gen7_mfd_context->reference_surface[i].obj_surface &&
539 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
541 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542 gen7_avc_surface = obj_surface->private_data;
544 OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
545 I915_GEM_DOMAIN_INSTRUCTION, 0,
548 OUT_BCS_BATCH(batch, 0);
549 OUT_BCS_BATCH(batch, 0);
553 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
555 /* the current decoding frame/field */
556 va_pic = &pic_param->CurrPic;
557 obj_surface = decode_state->render_object;
558 assert(obj_surface->bo && obj_surface->private_data);
559 gen7_avc_surface = obj_surface->private_data;
561 OUT_BCS_RELOC64(batch, gen7_avc_surface->dmv_top,
562 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
565 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
568 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
569 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
572 const VAPictureH264 * const va_pic = avc_find_picture(
573 obj_surface->base.id, pic_param->ReferenceFrames,
574 ARRAY_ELEMS(pic_param->ReferenceFrames));
576 assert(va_pic != NULL);
577 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
578 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
580 OUT_BCS_BATCH(batch, 0);
581 OUT_BCS_BATCH(batch, 0);
585 va_pic = &pic_param->CurrPic;
586 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
587 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
589 ADVANCE_BCS_BATCH(batch);
593 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
594 VAPictureParameterBufferH264 *pic_param,
595 VASliceParameterBufferH264 *next_slice_param,
596 struct gen7_mfd_context *gen7_mfd_context)
598 gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
602 gen8_mfd_avc_slice_state(VADriverContextP ctx,
603 VAPictureParameterBufferH264 *pic_param,
604 VASliceParameterBufferH264 *slice_param,
605 VASliceParameterBufferH264 *next_slice_param,
606 struct gen7_mfd_context *gen7_mfd_context)
608 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
609 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
610 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
611 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
612 int num_ref_idx_l0, num_ref_idx_l1;
613 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
614 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
615 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
618 if (slice_param->slice_type == SLICE_TYPE_I ||
619 slice_param->slice_type == SLICE_TYPE_SI) {
620 slice_type = SLICE_TYPE_I;
621 } else if (slice_param->slice_type == SLICE_TYPE_P ||
622 slice_param->slice_type == SLICE_TYPE_SP) {
623 slice_type = SLICE_TYPE_P;
625 assert(slice_param->slice_type == SLICE_TYPE_B);
626 slice_type = SLICE_TYPE_B;
629 if (slice_type == SLICE_TYPE_I) {
630 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
631 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
634 } else if (slice_type == SLICE_TYPE_P) {
635 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
636 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
639 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
640 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
643 first_mb_in_slice = slice_param->first_mb_in_slice;
644 slice_hor_pos = first_mb_in_slice % width_in_mbs;
645 slice_ver_pos = first_mb_in_slice / width_in_mbs;
648 slice_ver_pos = slice_ver_pos << 1;
649 if (next_slice_param) {
650 first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
651 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
652 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
655 next_slice_ver_pos = next_slice_ver_pos << 1;
657 next_slice_hor_pos = 0;
658 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
661 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663 OUT_BCS_BATCH(batch, slice_type);
665 (num_ref_idx_l1 << 24) |
666 (num_ref_idx_l0 << 16) |
667 (slice_param->chroma_log2_weight_denom << 8) |
668 (slice_param->luma_log2_weight_denom << 0));
670 (slice_param->direct_spatial_mv_pred_flag << 29) |
671 (slice_param->disable_deblocking_filter_idc << 27) |
672 (slice_param->cabac_init_idc << 24) |
673 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
677 (slice_ver_pos << 24) |
678 (slice_hor_pos << 16) |
679 (first_mb_in_slice << 0));
681 (next_slice_ver_pos << 16) |
682 (next_slice_hor_pos << 0));
684 (next_slice_param == NULL) << 19); /* last slice flag */
685 OUT_BCS_BATCH(batch, 0);
686 OUT_BCS_BATCH(batch, 0);
687 OUT_BCS_BATCH(batch, 0);
688 OUT_BCS_BATCH(batch, 0);
689 ADVANCE_BCS_BATCH(batch);
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694 VAPictureParameterBufferH264 *pic_param,
695 VASliceParameterBufferH264 *slice_param,
696 struct gen7_mfd_context *gen7_mfd_context)
698 gen6_send_avc_ref_idx_state(
699 gen7_mfd_context->base.batch,
701 gen7_mfd_context->reference_surface
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707 VAPictureParameterBufferH264 *pic_param,
708 VASliceParameterBufferH264 *slice_param,
709 struct gen7_mfd_context *gen7_mfd_context)
711 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712 int i, j, num_weight_offset_table = 0;
713 short weightoffsets[32 * 6];
715 if ((slice_param->slice_type == SLICE_TYPE_P ||
716 slice_param->slice_type == SLICE_TYPE_SP) &&
717 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718 num_weight_offset_table = 1;
721 if ((slice_param->slice_type == SLICE_TYPE_B) &&
722 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723 num_weight_offset_table = 2;
726 for (i = 0; i < num_weight_offset_table; i++) {
727 BEGIN_BCS_BATCH(batch, 98);
728 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729 OUT_BCS_BATCH(batch, i);
732 for (j = 0; j < 32; j++) {
733 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
741 for (j = 0; j < 32; j++) {
742 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
751 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752 ADVANCE_BCS_BATCH(batch);
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758 VAPictureParameterBufferH264 *pic_param,
759 VASliceParameterBufferH264 *slice_param,
760 dri_bo *slice_data_bo,
761 VASliceParameterBufferH264 *next_slice_param,
762 struct gen7_mfd_context *gen7_mfd_context)
764 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
767 pic_param->pic_fields.bits.entropy_coding_mode_flag);
769 /* the input bitsteam format on GEN7 differs from GEN6 */
770 BEGIN_BCS_BATCH(batch, 6);
771 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
773 (slice_param->slice_data_size));
774 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
782 ((slice_data_bit_offset >> 3) << 16) |
786 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787 (slice_data_bit_offset & 0x7));
788 OUT_BCS_BATCH(batch, 0);
789 ADVANCE_BCS_BATCH(batch);
793 gen8_mfd_avc_context_init(
794 VADriverContextP ctx,
795 struct gen7_mfd_context *gen7_mfd_context
798 /* Initialize flat scaling lists */
799 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804 struct decode_state *decode_state,
805 struct gen7_mfd_context *gen7_mfd_context)
807 VAPictureParameterBufferH264 *pic_param;
808 VASliceParameterBufferH264 *slice_param;
809 struct i965_driver_data *i965 = i965_driver_data(ctx);
810 struct object_surface *obj_surface;
812 int i, j, enable_avc_ildb = 0;
813 unsigned int width_in_mbs, height_in_mbs;
815 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
819 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821 assert((slice_param->slice_type == SLICE_TYPE_I) ||
822 (slice_param->slice_type == SLICE_TYPE_SI) ||
823 (slice_param->slice_type == SLICE_TYPE_P) ||
824 (slice_param->slice_type == SLICE_TYPE_SP) ||
825 (slice_param->slice_type == SLICE_TYPE_B));
827 if (slice_param->disable_deblocking_filter_idc != 1) {
836 assert(decode_state->pic_param && decode_state->pic_param->buffer);
837 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838 gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
839 gen7_mfd_context->reference_surface);
840 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
841 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
842 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
843 assert(height_in_mbs > 0 && height_in_mbs <= 256);
845 /* Current decoded picture */
846 obj_surface = decode_state->render_object;
847 if (pic_param->pic_fields.bits.reference_pic_flag)
848 obj_surface->flags |= SURFACE_REFERENCED;
850 obj_surface->flags &= ~SURFACE_REFERENCED;
852 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
853 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
855 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
856 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
857 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
858 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
860 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
861 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
862 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
863 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
865 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
866 bo = dri_bo_alloc(i965->intel.bufmgr,
871 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
872 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
874 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
875 bo = dri_bo_alloc(i965->intel.bufmgr,
876 "deblocking filter row store",
877 width_in_mbs * 64 * 4,
880 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
881 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
883 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
884 bo = dri_bo_alloc(i965->intel.bufmgr,
886 width_in_mbs * 64 * 2,
889 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
890 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
892 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
893 bo = dri_bo_alloc(i965->intel.bufmgr,
895 width_in_mbs * 64 * 2,
898 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
899 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
901 gen7_mfd_context->bitplane_read_buffer.valid = 0;
905 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
906 struct decode_state *decode_state,
907 struct gen7_mfd_context *gen7_mfd_context)
909 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
910 VAPictureParameterBufferH264 *pic_param;
911 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
912 dri_bo *slice_data_bo;
915 assert(decode_state->pic_param && decode_state->pic_param->buffer);
916 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
917 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
919 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
920 intel_batchbuffer_emit_mi_flush(batch);
921 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
922 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
923 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
924 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
925 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
926 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
927 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
929 for (j = 0; j < decode_state->num_slice_params; j++) {
930 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
931 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
932 slice_data_bo = decode_state->slice_datas[j]->bo;
933 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
935 if (j == decode_state->num_slice_params - 1)
936 next_slice_group_param = NULL;
938 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
940 if (j == 0 && slice_param->first_mb_in_slice)
941 gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
943 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
944 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
945 assert((slice_param->slice_type == SLICE_TYPE_I) ||
946 (slice_param->slice_type == SLICE_TYPE_SI) ||
947 (slice_param->slice_type == SLICE_TYPE_P) ||
948 (slice_param->slice_type == SLICE_TYPE_SP) ||
949 (slice_param->slice_type == SLICE_TYPE_B));
951 if (i < decode_state->slice_params[j]->num_elements - 1)
952 next_slice_param = slice_param + 1;
954 next_slice_param = next_slice_group_param;
956 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
957 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
958 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
959 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
960 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
965 intel_batchbuffer_end_atomic(batch);
966 intel_batchbuffer_flush(batch);
970 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
971 struct decode_state *decode_state,
972 struct gen7_mfd_context *gen7_mfd_context)
974 VAPictureParameterBufferMPEG2 *pic_param;
975 struct i965_driver_data *i965 = i965_driver_data(ctx);
976 struct object_surface *obj_surface;
978 unsigned int width_in_mbs;
980 assert(decode_state->pic_param && decode_state->pic_param->buffer);
981 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
982 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
984 mpeg2_set_reference_surfaces(
986 gen7_mfd_context->reference_surface,
991 /* Current decoded picture */
992 obj_surface = decode_state->render_object;
993 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
995 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
996 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
997 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
998 gen7_mfd_context->pre_deblocking_output.valid = 1;
1000 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1001 bo = dri_bo_alloc(i965->intel.bufmgr,
1002 "bsd mpc row store",
1006 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1007 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1009 gen7_mfd_context->post_deblocking_output.valid = 0;
1010 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1011 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1012 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1013 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1017 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1018 struct decode_state *decode_state,
1019 struct gen7_mfd_context *gen7_mfd_context)
1021 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1022 VAPictureParameterBufferMPEG2 *pic_param;
1023 unsigned int slice_concealment_disable_bit = 0;
1025 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1026 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1028 slice_concealment_disable_bit = 1;
1030 BEGIN_BCS_BATCH(batch, 13);
1031 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1032 OUT_BCS_BATCH(batch,
1033 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1034 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1035 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1036 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1037 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1038 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1039 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1040 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1041 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1042 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1043 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1044 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1045 OUT_BCS_BATCH(batch,
1046 pic_param->picture_coding_type << 9);
1047 OUT_BCS_BATCH(batch,
1048 (slice_concealment_disable_bit << 31) |
1049 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1050 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1051 OUT_BCS_BATCH(batch, 0);
1052 OUT_BCS_BATCH(batch, 0);
1053 OUT_BCS_BATCH(batch, 0);
1054 OUT_BCS_BATCH(batch, 0);
1055 OUT_BCS_BATCH(batch, 0);
1056 OUT_BCS_BATCH(batch, 0);
1057 OUT_BCS_BATCH(batch, 0);
1058 OUT_BCS_BATCH(batch, 0);
1059 OUT_BCS_BATCH(batch, 0);
1060 ADVANCE_BCS_BATCH(batch);
1064 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1065 struct decode_state *decode_state,
1066 struct gen7_mfd_context *gen7_mfd_context)
1068 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1071 /* Update internal QM state */
1072 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1073 VAIQMatrixBufferMPEG2 * const iq_matrix =
1074 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1076 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1077 iq_matrix->load_intra_quantiser_matrix) {
1078 gen_iq_matrix->load_intra_quantiser_matrix =
1079 iq_matrix->load_intra_quantiser_matrix;
1080 if (iq_matrix->load_intra_quantiser_matrix) {
1081 for (j = 0; j < 64; j++)
1082 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1083 iq_matrix->intra_quantiser_matrix[j];
1087 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1088 iq_matrix->load_non_intra_quantiser_matrix) {
1089 gen_iq_matrix->load_non_intra_quantiser_matrix =
1090 iq_matrix->load_non_intra_quantiser_matrix;
1091 if (iq_matrix->load_non_intra_quantiser_matrix) {
1092 for (j = 0; j < 64; j++)
1093 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1094 iq_matrix->non_intra_quantiser_matrix[j];
1099 /* Commit QM state to HW */
1100 for (i = 0; i < 2; i++) {
1101 unsigned char *qm = NULL;
1105 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1106 qm = gen_iq_matrix->intra_quantiser_matrix;
1107 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1110 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1111 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1112 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1119 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1124 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1125 VAPictureParameterBufferMPEG2 *pic_param,
1126 VASliceParameterBufferMPEG2 *slice_param,
1127 VASliceParameterBufferMPEG2 *next_slice_param,
1128 struct gen7_mfd_context *gen7_mfd_context)
1130 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1131 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1132 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1134 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1135 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1137 is_field_pic_wa = is_field_pic &&
1138 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1140 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1141 hpos0 = slice_param->slice_horizontal_position;
1143 if (next_slice_param == NULL) {
1144 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1147 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1148 hpos1 = next_slice_param->slice_horizontal_position;
1151 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1153 BEGIN_BCS_BATCH(batch, 5);
1154 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1155 OUT_BCS_BATCH(batch,
1156 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1157 OUT_BCS_BATCH(batch,
1158 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1159 OUT_BCS_BATCH(batch,
1163 (next_slice_param == NULL) << 5 |
1164 (next_slice_param == NULL) << 3 |
1165 (slice_param->macroblock_offset & 0x7));
1166 OUT_BCS_BATCH(batch,
1167 (slice_param->quantiser_scale_code << 24) |
1168 (vpos1 << 8 | hpos1));
1169 ADVANCE_BCS_BATCH(batch);
1173 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1174 struct decode_state *decode_state,
1175 struct gen7_mfd_context *gen7_mfd_context)
1177 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1178 VAPictureParameterBufferMPEG2 *pic_param;
1179 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1180 dri_bo *slice_data_bo;
1183 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1184 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1186 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1187 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1188 intel_batchbuffer_emit_mi_flush(batch);
1189 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1190 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1194 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1196 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1197 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1198 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1200 for (j = 0; j < decode_state->num_slice_params; j++) {
1201 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1202 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1203 slice_data_bo = decode_state->slice_datas[j]->bo;
1204 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1206 if (j == decode_state->num_slice_params - 1)
1207 next_slice_group_param = NULL;
1209 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1211 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1212 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1214 if (i < decode_state->slice_params[j]->num_elements - 1)
1215 next_slice_param = slice_param + 1;
1217 next_slice_param = next_slice_group_param;
1219 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1224 intel_batchbuffer_end_atomic(batch);
1225 intel_batchbuffer_flush(batch);
1228 static const int va_to_gen7_vc1_mv[4] = {
1230 2, /* 1-MV half-pel */
1231 3, /* 1-MV half-pef bilinear */
1235 static const int b_picture_scale_factor[21] = {
1236 128, 85, 170, 64, 192,
1237 51, 102, 153, 204, 43,
1238 215, 37, 74, 111, 148,
1239 185, 222, 32, 96, 160,
1243 static const int va_to_gen7_vc1_condover[3] = {
1249 static const int fptype_to_picture_type[8][2] = {
1250 {GEN7_VC1_I_PICTURE, GEN7_VC1_I_PICTURE},
1251 {GEN7_VC1_I_PICTURE, GEN7_VC1_P_PICTURE},
1252 {GEN7_VC1_P_PICTURE, GEN7_VC1_I_PICTURE},
1253 {GEN7_VC1_P_PICTURE, GEN7_VC1_P_PICTURE},
1254 {GEN7_VC1_B_PICTURE, GEN7_VC1_B_PICTURE},
1255 {GEN7_VC1_B_PICTURE, GEN7_VC1_BI_PICTURE},
1256 {GEN7_VC1_BI_PICTURE, GEN7_VC1_B_PICTURE},
1257 {GEN7_VC1_BI_PICTURE, GEN7_VC1_BI_PICTURE}
1261 gen8_mfd_free_vc1_surface(void **data)
1263 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1265 if (!gen7_vc1_surface)
1268 dri_bo_unreference(gen7_vc1_surface->dmv_top);
1269 dri_bo_unreference(gen7_vc1_surface->dmv_bottom);
1270 free(gen7_vc1_surface);
1275 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1276 VAPictureParameterBufferVC1 *pic_param,
1277 struct object_surface *obj_surface)
1279 struct i965_driver_data *i965 = i965_driver_data(ctx);
1280 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1283 int is_first_field = 1;
1285 if (!pic_param->sequence_fields.bits.interlace ||
1286 (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1287 picture_type = pic_param->picture_fields.bits.picture_type;
1288 } else {/* Field-Interlace */
1289 is_first_field = pic_param->picture_fields.bits.is_first_field;
1290 picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1293 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1295 if (!gen7_vc1_surface) {
1296 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1298 if (!gen7_vc1_surface)
1301 assert((obj_surface->size & 0x3f) == 0);
1302 obj_surface->private_data = gen7_vc1_surface;
1305 if (!pic_param->sequence_fields.bits.interlace ||
1306 pic_param->picture_fields.bits.frame_coding_mode < 2 || /* Progressive or Frame-Interlace */
1308 gen7_vc1_surface->picture_type_top = 0;
1309 gen7_vc1_surface->picture_type_bottom = 0;
1310 gen7_vc1_surface->intensity_compensation_top = 0;
1311 gen7_vc1_surface->intensity_compensation_bottom = 0;
1312 gen7_vc1_surface->luma_scale_top[0] = 0;
1313 gen7_vc1_surface->luma_scale_top[1] = 0;
1314 gen7_vc1_surface->luma_scale_bottom[0] = 0;
1315 gen7_vc1_surface->luma_scale_bottom[1] = 0;
1316 gen7_vc1_surface->luma_shift_top[0] = 0;
1317 gen7_vc1_surface->luma_shift_top[1] = 0;
1318 gen7_vc1_surface->luma_shift_bottom[0] = 0;
1319 gen7_vc1_surface->luma_shift_bottom[1] = 0;
1322 if (!pic_param->sequence_fields.bits.interlace ||
1323 pic_param->picture_fields.bits.frame_coding_mode < 2) { /* Progressive or Frame-Interlace */
1324 gen7_vc1_surface->picture_type_top = picture_type;
1325 gen7_vc1_surface->picture_type_bottom = picture_type;
1326 } else if (pic_param->picture_fields.bits.top_field_first ^ is_first_field)
1327 gen7_vc1_surface->picture_type_bottom = picture_type;
1329 gen7_vc1_surface->picture_type_top = picture_type;
1332 * The Direct MV buffer is scalable with frame height, but
1333 * does not scale with frame width as the hardware assumes
1334 * that frame width is fixed at 128 MBs.
1337 if (gen7_vc1_surface->dmv_top == NULL) {
1338 height_in_mbs = ALIGN(obj_surface->orig_height, 16) / 16;
1339 gen7_vc1_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
1340 "direct mv w/r buffer",
1341 128 * height_in_mbs * 64,
1345 if (pic_param->sequence_fields.bits.interlace &&
1346 gen7_vc1_surface->dmv_bottom == NULL) {
1347 height_in_mbs = ALIGN(obj_surface->orig_height, 32) / 32;
1348 gen7_vc1_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
1349 "direct mv w/r buffer",
1350 128 * height_in_mbs * 64,
1356 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1357 struct decode_state *decode_state,
1358 struct gen7_mfd_context *gen7_mfd_context)
1360 VAPictureParameterBufferVC1 *pic_param;
1361 struct i965_driver_data *i965 = i965_driver_data(ctx);
1362 struct object_surface *obj_surface;
1363 struct gen7_vc1_surface *gen7_vc1_current_surface;
1364 struct gen7_vc1_surface *gen7_vc1_forward_surface;
1368 int is_first_field = 1;
1371 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1372 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1373 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1375 if (!pic_param->sequence_fields.bits.interlace ||
1376 (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1377 picture_type = pic_param->picture_fields.bits.picture_type;
1378 } else {/* Field-Interlace */
1379 is_first_field = pic_param->picture_fields.bits.is_first_field;
1380 picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1383 /* Current decoded picture */
1384 obj_surface = decode_state->render_object;
1385 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1386 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1388 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1389 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1390 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1392 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1393 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1394 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1396 if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1397 gen7_mfd_context->post_deblocking_output.valid = 0;
1398 gen7_mfd_context->pre_deblocking_output.valid = 1;
1400 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1401 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1404 intel_update_vc1_frame_store_index(ctx,
1407 gen7_mfd_context->reference_surface);
1409 if (picture_type == GEN7_VC1_P_PICTURE) {
1410 obj_surface = decode_state->reference_objects[0];
1411 gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1412 if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1414 gen7_vc1_forward_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1416 gen7_vc1_forward_surface = NULL;
1418 if (!pic_param->sequence_fields.bits.interlace ||
1419 pic_param->picture_fields.bits.frame_coding_mode == 0) { /* Progressive */
1420 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1421 if (gen7_vc1_forward_surface) {
1422 gen7_vc1_forward_surface->intensity_compensation_top = 1;
1423 gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
1424 gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
1425 gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
1426 gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
1427 gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
1430 } else if (pic_param->sequence_fields.bits.interlace &&
1431 pic_param->picture_fields.bits.frame_coding_mode == 1) { /* Frame-Interlace */
1432 if (pic_param->picture_fields.bits.intensity_compensation) {
1433 if (gen7_vc1_forward_surface) {
1434 gen7_vc1_forward_surface->intensity_compensation_top = 1;
1435 gen7_vc1_forward_surface->intensity_compensation_bottom = 1;
1436 gen7_vc1_forward_surface->luma_scale_top[0] = pic_param->luma_scale;
1437 gen7_vc1_forward_surface->luma_scale_bottom[0] = pic_param->luma_scale;
1438 gen7_vc1_forward_surface->luma_shift_top[0] = pic_param->luma_shift;
1439 gen7_vc1_forward_surface->luma_shift_bottom[0] = pic_param->luma_shift;
1442 } else if (pic_param->sequence_fields.bits.interlace &&
1443 pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
1444 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1445 if (pic_param->intensity_compensation_field == 1 || /* Top field */
1446 pic_param->intensity_compensation_field == 0) { /* Both fields */
1447 if (is_first_field) {
1448 if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1449 (pic_param->reference_fields.bits.reference_field_pic_indicator ==
1450 pic_param->picture_fields.bits.top_field_first)) ||
1451 pic_param->reference_fields.bits.num_reference_pictures) {
1452 if (gen7_vc1_forward_surface) {
1453 i = gen7_vc1_forward_surface->intensity_compensation_top++;
1454 gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
1455 gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
1458 } else { /* Second field */
1459 if (pic_param->picture_fields.bits.top_field_first) {
1460 if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1461 !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1462 pic_param->reference_fields.bits.num_reference_pictures) {
1463 i = gen7_vc1_current_surface->intensity_compensation_top++;
1464 gen7_vc1_current_surface->luma_scale_top[i] = pic_param->luma_scale;
1465 gen7_vc1_current_surface->luma_shift_top[i] = pic_param->luma_shift;
1468 if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1469 pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1470 pic_param->reference_fields.bits.num_reference_pictures) {
1471 if (gen7_vc1_forward_surface) {
1472 i = gen7_vc1_forward_surface->intensity_compensation_top++;
1473 gen7_vc1_forward_surface->luma_scale_top[i] = pic_param->luma_scale;
1474 gen7_vc1_forward_surface->luma_shift_top[i] = pic_param->luma_shift;
1480 if (pic_param->intensity_compensation_field == 2 || /* Bottom field */
1481 pic_param->intensity_compensation_field == 0) { /* Both fields */
1482 if (is_first_field) {
1483 if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1484 (pic_param->reference_fields.bits.reference_field_pic_indicator ^
1485 pic_param->picture_fields.bits.top_field_first)) ||
1486 pic_param->reference_fields.bits.num_reference_pictures) {
1487 if (gen7_vc1_forward_surface) {
1488 i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
1489 if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1490 gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1491 gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1492 } else { /* Both fields */
1493 gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1494 gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1498 } else { /* Second field */
1499 if (pic_param->picture_fields.bits.top_field_first) {
1500 if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1501 pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1502 pic_param->reference_fields.bits.num_reference_pictures) {
1503 if (gen7_vc1_forward_surface) {
1504 i = gen7_vc1_forward_surface->intensity_compensation_bottom++;
1505 if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1506 gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1507 gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1508 } else { /* Both fields */
1509 gen7_vc1_forward_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1510 gen7_vc1_forward_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1515 if ((!pic_param->reference_fields.bits.num_reference_pictures &&
1516 !pic_param->reference_fields.bits.reference_field_pic_indicator) ||
1517 pic_param->reference_fields.bits.num_reference_pictures) {
1518 i = gen7_vc1_current_surface->intensity_compensation_bottom++;
1519 if (pic_param->intensity_compensation_field == 2) { /* Bottom field */
1520 gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale;
1521 gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift;
1522 } else { /* Both fields */
1523 gen7_vc1_current_surface->luma_scale_bottom[i] = pic_param->luma_scale2;
1524 gen7_vc1_current_surface->luma_shift_bottom[i] = pic_param->luma_shift2;
1534 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1535 bo = dri_bo_alloc(i965->intel.bufmgr,
1540 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1541 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1543 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1544 bo = dri_bo_alloc(i965->intel.bufmgr,
1545 "deblocking filter row store",
1546 width_in_mbs * 7 * 64,
1549 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1550 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1552 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1553 bo = dri_bo_alloc(i965->intel.bufmgr,
1554 "bsd mpc row store",
1558 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1559 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1561 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1563 if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1564 gen7_mfd_context->bitplane_read_buffer.valid = 1;
1566 gen7_mfd_context->bitplane_read_buffer.valid = !!(pic_param->bitplane_present.value & 0x7f);
1567 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1569 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1570 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1572 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1574 uint8_t *src = NULL, *dst = NULL;
1576 if (!pic_param->sequence_fields.bits.interlace ||
1577 (pic_param->picture_fields.bits.frame_coding_mode < 2)) /* Progressive or Frame-Interlace */
1578 height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1579 else /* Field-Interlace */
1580 height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
1582 bo = dri_bo_alloc(i965->intel.bufmgr,
1584 bitplane_width * height_in_mbs,
1587 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1589 dri_bo_map(bo, True);
1590 assert(bo->virtual);
1593 if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1594 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1595 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1597 uint8_t src_value = 0x2;
1599 dst_index = src_w / 2;
1600 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1604 dst[src_w / 2] >>= 4;
1606 dst += bitplane_width;
1609 assert(decode_state->bit_plane->buffer);
1610 src = decode_state->bit_plane->buffer;
1612 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1613 for (src_w = 0; src_w < width_in_mbs; src_w++) {
1614 int src_index, dst_index;
1618 src_index = (src_h * width_in_mbs + src_w) / 2;
1619 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1620 src_value = ((src[src_index] >> src_shift) & 0xf);
1622 dst_index = src_w / 2;
1623 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1627 dst[src_w / 2] >>= 4;
1629 dst += bitplane_width;
1635 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1639 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1640 struct decode_state *decode_state,
1641 struct gen7_mfd_context *gen7_mfd_context)
1643 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1644 VAPictureParameterBufferVC1 *pic_param;
1645 struct object_surface *obj_surface;
1646 struct gen7_vc1_surface *gen7_vc1_current_surface;
1647 struct gen7_vc1_surface *gen7_vc1_reference_surface;
1648 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1649 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1650 int unified_mv_mode = 0;
1651 int ref_field_pic_polarity = 0;
1652 int scale_factor = 0;
1654 int dmv_surface_valid = 0;
1661 int interpolation_mode = 0;
1663 int is_first_field = 1;
1665 int bitplane_present;
1666 int range_reduction = 0;
1667 int range_reduction_scale = 0;
1668 int forward_mb = 0, mv_type_mb = 0, skip_mb = 0, direct_mb = 0;
1669 int overflags = 0, ac_pred = 0, field_tx = 0;
1671 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1672 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1674 if (!pic_param->sequence_fields.bits.interlace ||
1675 (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
1676 picture_type = pic_param->picture_fields.bits.picture_type;
1677 height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1678 } else {/* Field-Interlace */
1679 is_first_field = pic_param->picture_fields.bits.is_first_field;
1680 picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
1681 height_in_mbs = ALIGN(pic_param->coded_height, 32) / 32;
1684 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1685 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1686 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1687 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1688 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1689 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1690 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1693 alt_pquant_config = 0;
1694 alt_pquant_edge_mask = 0;
1695 } else if (dquant == 2) {
1696 alt_pquant_config = 1;
1697 alt_pquant_edge_mask = 0xf;
1699 assert(dquant == 1);
1700 if (dquantfrm == 0) {
1701 alt_pquant_config = 0;
1702 alt_pquant_edge_mask = 0;
1705 assert(dquantfrm == 1);
1706 alt_pquant_config = 1;
1708 switch (dqprofile) {
1710 if (dqbilevel == 0) {
1711 alt_pquant_config = 2;
1712 alt_pquant_edge_mask = 0;
1714 assert(dqbilevel == 1);
1715 alt_pquant_config = 3;
1716 alt_pquant_edge_mask = 0;
1721 alt_pquant_edge_mask = 0xf;
1726 alt_pquant_edge_mask = 0x9;
1728 alt_pquant_edge_mask = (0x3 << dqdbedge);
1733 alt_pquant_edge_mask = (0x1 << dqsbedge);
1742 if (pic_param->sequence_fields.bits.profile == 1 && /* Main Profile */
1743 pic_param->sequence_fields.bits.rangered) {
1744 obj_surface = decode_state->reference_objects[0];
1746 gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1748 if (pic_param->forward_reference_picture != VA_INVALID_ID &&
1750 gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1752 gen7_vc1_reference_surface = NULL;
1754 if (picture_type == GEN7_VC1_SKIPPED_PICTURE)
1755 if (gen7_vc1_reference_surface)
1756 gen7_vc1_current_surface->range_reduction_frame = gen7_vc1_reference_surface->range_reduction_frame;
1758 gen7_vc1_current_surface->range_reduction_frame = 0;
1760 gen7_vc1_current_surface->range_reduction_frame = pic_param->range_reduction_frame;
1762 if (gen7_vc1_reference_surface) {
1763 if (gen7_vc1_current_surface->range_reduction_frame &&
1764 !gen7_vc1_reference_surface->range_reduction_frame) {
1765 range_reduction = 1;
1766 range_reduction_scale = 0;
1767 } else if (!gen7_vc1_current_surface->range_reduction_frame &&
1768 gen7_vc1_reference_surface->range_reduction_frame) {
1769 range_reduction = 1;
1770 range_reduction_scale = 1;
1775 if ((!pic_param->sequence_fields.bits.interlace ||
1776 pic_param->picture_fields.bits.frame_coding_mode != 1) && /* Progressive or Field-Interlace */
1777 (picture_type == GEN7_VC1_P_PICTURE ||
1778 picture_type == GEN7_VC1_B_PICTURE)) {
1779 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1780 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1781 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1783 assert(pic_param->mv_fields.bits.mv_mode < 4);
1784 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1788 if (pic_param->sequence_fields.bits.interlace &&
1789 pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
1790 picture_type == GEN7_VC1_P_PICTURE &&
1791 !pic_param->reference_fields.bits.num_reference_pictures) {
1792 if (pic_param->reference_fields.bits.reference_field_pic_indicator == 0) {
1793 ref_field_pic_polarity = is_first_field ?
1794 pic_param->picture_fields.bits.top_field_first :
1795 !pic_param->picture_fields.bits.top_field_first;
1797 ref_field_pic_polarity = is_first_field ?
1798 !pic_param->picture_fields.bits.top_field_first :
1799 pic_param->picture_fields.bits.top_field_first;
1803 if (pic_param->b_picture_fraction < 21)
1804 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1806 if (picture_type == GEN7_VC1_SKIPPED_PICTURE) {
1807 ptype = GEN7_VC1_P_PICTURE;
1808 bitplane_present = 1;
1810 ptype = pic_param->picture_fields.bits.picture_type;
1811 bitplane_present = !!(pic_param->bitplane_present.value & 0x7f);
1812 forward_mb = pic_param->raw_coding.flags.forward_mb;
1813 mv_type_mb = pic_param->raw_coding.flags.mv_type_mb;
1814 skip_mb = pic_param->raw_coding.flags.skip_mb;
1815 direct_mb = pic_param->raw_coding.flags.direct_mb;
1816 overflags = pic_param->raw_coding.flags.overflags;
1817 ac_pred = pic_param->raw_coding.flags.ac_pred;
1818 field_tx = pic_param->raw_coding.flags.field_tx;
1819 loopfilter = pic_param->entrypoint_fields.bits.loopfilter;
1822 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1823 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1825 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1828 * 8.3.6.2.1 Transform Type Selection
1829 * If variable-sized transform coding is not enabled,
1830 * then the 8x8 transform shall be used for all blocks.
1831 * it is also MFX_VC1_PIC_STATE requirement.
1833 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1834 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1835 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1839 if (picture_type == GEN7_VC1_B_PICTURE) {
1840 obj_surface = decode_state->reference_objects[1];
1842 if (pic_param->backward_reference_picture != VA_INVALID_ID &&
1844 gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1846 gen7_vc1_reference_surface = NULL;
1848 if (gen7_vc1_reference_surface) {
1849 if (pic_param->sequence_fields.bits.interlace &&
1850 pic_param->picture_fields.bits.frame_coding_mode == 2 && /* Field-Interlace */
1851 pic_param->picture_fields.bits.top_field_first ^ is_first_field) {
1852 if (gen7_vc1_reference_surface->picture_type_bottom == GEN7_VC1_P_PICTURE)
1853 dmv_surface_valid = 1;
1854 } else if (gen7_vc1_reference_surface->picture_type_top == GEN7_VC1_P_PICTURE)
1855 dmv_surface_valid = 1;
1859 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1861 if (pic_param->sequence_fields.bits.interlace) {
1862 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1863 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1864 else if (!pic_param->picture_fields.bits.top_field_first)
1865 fcm = 3; /* Field with bottom field first */
1867 fcm = 2; /* Field with top field first */
1870 if (pic_param->sequence_fields.bits.interlace &&
1871 pic_param->picture_fields.bits.frame_coding_mode == 2) { /* Field-Interlace */
1872 if (picture_type == GEN7_VC1_I_PICTURE ||
1873 picture_type == GEN7_VC1_P_PICTURE) {
1874 gen7_vc1_current_surface = (struct gen7_vc1_surface *)(decode_state->render_object->private_data);
1877 gen7_vc1_current_surface->reference_distance = pic_param->reference_fields.bits.reference_distance;
1879 frfd = gen7_vc1_current_surface->reference_distance;
1880 } else if (picture_type == GEN7_VC1_B_PICTURE) {
1881 obj_surface = decode_state->reference_objects[1];
1883 if (pic_param->backward_reference_picture != VA_INVALID_ID &&
1885 gen7_vc1_reference_surface = (struct gen7_vc1_surface *)(obj_surface->private_data);
1887 gen7_vc1_reference_surface = NULL;
1889 if (gen7_vc1_reference_surface) {
1890 frfd = (scale_factor * gen7_vc1_reference_surface->reference_distance) >> 8;
1892 brfd = gen7_vc1_reference_surface->reference_distance - frfd - 1;
1899 if (pic_param->sequence_fields.bits.overlap) {
1900 if (pic_param->sequence_fields.bits.profile == 3) { /* Advanced Profile */
1901 if (picture_type == GEN7_VC1_P_PICTURE &&
1902 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1905 if (picture_type == GEN7_VC1_I_PICTURE ||
1906 picture_type == GEN7_VC1_BI_PICTURE) {
1907 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9) {
1909 } else if (pic_param->conditional_overlap_flag == 1 || /* all block boundaries */
1910 pic_param->conditional_overlap_flag == 2) { /* coded by OVERFLAGSMB bitplane */
1915 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1916 picture_type != GEN7_VC1_B_PICTURE) {
1922 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1923 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1924 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1925 interpolation_mode = 8 | pic_param->fast_uvmc_flag;
1927 interpolation_mode = 0 | pic_param->fast_uvmc_flag;
1929 BEGIN_BCS_BATCH(batch, 6);
1930 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1931 OUT_BCS_BATCH(batch,
1932 ((height_in_mbs - 1) << 16) |
1933 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1934 OUT_BCS_BATCH(batch,
1935 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1936 dmv_surface_valid << 15 |
1937 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1938 pic_param->rounding_control << 13 |
1939 pic_param->sequence_fields.bits.syncmarker << 12 |
1940 interpolation_mode << 8 |
1941 range_reduction_scale << 7 |
1942 range_reduction << 6 |
1945 !is_first_field << 3 |
1946 (pic_param->sequence_fields.bits.profile == 3) << 0); /* Advanced Profile */
1947 OUT_BCS_BATCH(batch,
1948 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1952 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1954 OUT_BCS_BATCH(batch,
1955 unified_mv_mode << 28 |
1956 pic_param->mv_fields.bits.four_mv_switch << 27 |
1957 pic_param->fast_uvmc_flag << 26 |
1958 ref_field_pic_polarity << 25 |
1959 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1962 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1963 pic_param->mv_fields.bits.extended_mv_range << 8 |
1964 alt_pquant_edge_mask << 4 |
1965 alt_pquant_config << 2 |
1966 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1967 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1968 OUT_BCS_BATCH(batch,
1969 bitplane_present << 31 |
1977 pic_param->mv_fields.bits.mv_table << 20 |
1978 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1979 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1980 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1981 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1982 pic_param->mb_mode_table << 8 |
1984 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1985 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1986 pic_param->cbp_table << 0);
1987 ADVANCE_BCS_BATCH(batch);
1991 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1992 struct decode_state *decode_state,
1993 struct gen7_mfd_context *gen7_mfd_context)
1995 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1996 VAPictureParameterBufferVC1 *pic_param;
1997 struct gen7_vc1_surface *gen7_vc1_top_surface;
1998 struct gen7_vc1_surface *gen7_vc1_bottom_surface;
2000 int is_first_field = 1;
2001 int intensitycomp_single_fwd = 0;
2002 int intensitycomp_single_bwd = 0;
2003 int intensitycomp_double_fwd = 0;
2004 int lumscale1_single_fwd = 0;
2005 int lumscale2_single_fwd = 0;
2006 int lumshift1_single_fwd = 0;
2007 int lumshift2_single_fwd = 0;
2008 int lumscale1_single_bwd = 0;
2009 int lumscale2_single_bwd = 0;
2010 int lumshift1_single_bwd = 0;
2011 int lumshift2_single_bwd = 0;
2012 int lumscale1_double_fwd = 0;
2013 int lumscale2_double_fwd = 0;
2014 int lumshift1_double_fwd = 0;
2015 int lumshift2_double_fwd = 0;
2016 int replication_mode = 0;
2018 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2019 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2021 if (!pic_param->sequence_fields.bits.interlace ||
2022 (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
2023 picture_type = pic_param->picture_fields.bits.picture_type;
2024 } else {/* Field-Interlace */
2025 is_first_field = pic_param->picture_fields.bits.is_first_field;
2026 picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
2029 if (picture_type == GEN7_VC1_P_PICTURE ||
2030 picture_type == GEN7_VC1_B_PICTURE) {
2031 if (gen7_mfd_context->reference_surface[0].surface_id != VA_INVALID_ID)
2032 gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[0].obj_surface->private_data);
2034 gen7_vc1_top_surface = NULL;
2036 if (gen7_vc1_top_surface) {
2037 intensitycomp_single_fwd = !!gen7_vc1_top_surface->intensity_compensation_top;
2038 lumscale1_single_fwd = gen7_vc1_top_surface->luma_scale_top[0];
2039 lumshift1_single_fwd = gen7_vc1_top_surface->luma_shift_top[0];
2040 if (gen7_vc1_top_surface->intensity_compensation_top == 2) {
2041 intensitycomp_double_fwd = 1;
2042 lumscale1_double_fwd = gen7_vc1_top_surface->luma_scale_top[1];
2043 lumshift1_double_fwd = gen7_vc1_top_surface->luma_shift_top[1];
2047 if (pic_param->sequence_fields.bits.interlace &&
2048 pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2049 if (gen7_mfd_context->reference_surface[2].surface_id != VA_INVALID_ID)
2050 gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[2].obj_surface->private_data);
2052 gen7_vc1_bottom_surface = NULL;
2054 if (gen7_vc1_bottom_surface) {
2055 intensitycomp_single_fwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
2056 lumscale2_single_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
2057 lumshift2_single_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
2058 if (gen7_vc1_bottom_surface->intensity_compensation_bottom == 2) {
2059 intensitycomp_double_fwd |= 2;
2060 lumscale2_double_fwd = gen7_vc1_bottom_surface->luma_scale_bottom[1];
2061 lumshift2_double_fwd = gen7_vc1_bottom_surface->luma_shift_bottom[1];
2067 if (picture_type == GEN7_VC1_B_PICTURE) {
2068 if (gen7_mfd_context->reference_surface[1].surface_id != VA_INVALID_ID)
2069 gen7_vc1_top_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[1].obj_surface->private_data);
2071 gen7_vc1_top_surface = NULL;
2073 if (gen7_vc1_top_surface) {
2074 intensitycomp_single_bwd = !!gen7_vc1_top_surface->intensity_compensation_top;
2075 lumscale1_single_bwd = gen7_vc1_top_surface->luma_scale_top[0];
2076 lumshift1_single_bwd = gen7_vc1_top_surface->luma_shift_top[0];
2079 if (pic_param->sequence_fields.bits.interlace &&
2080 pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2081 if (gen7_mfd_context->reference_surface[3].surface_id != VA_INVALID_ID)
2082 gen7_vc1_bottom_surface = (struct gen7_vc1_surface *)(gen7_mfd_context->reference_surface[3].obj_surface->private_data);
2084 gen7_vc1_bottom_surface = NULL;
2086 if (gen7_vc1_bottom_surface) {
2087 intensitycomp_single_bwd |= !!gen7_vc1_bottom_surface->intensity_compensation_bottom << 1;
2088 lumscale2_single_bwd = gen7_vc1_bottom_surface->luma_scale_bottom[0];
2089 lumshift2_single_bwd = gen7_vc1_bottom_surface->luma_shift_bottom[0];
2094 if (pic_param->sequence_fields.bits.interlace &&
2095 pic_param->picture_fields.bits.frame_coding_mode > 0) { /* Frame-Interlace or Field-Interlace */
2096 if (picture_type == GEN7_VC1_P_PICTURE)
2097 replication_mode = 0x5;
2098 else if (picture_type == GEN7_VC1_B_PICTURE)
2099 replication_mode = 0xf;
2102 BEGIN_BCS_BATCH(batch, 6);
2103 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
2104 OUT_BCS_BATCH(batch,
2105 intensitycomp_double_fwd << 14 |
2107 intensitycomp_single_fwd << 10 |
2108 intensitycomp_single_bwd << 8 |
2109 replication_mode << 4 |
2111 OUT_BCS_BATCH(batch,
2112 lumshift2_single_fwd << 24 |
2113 lumshift1_single_fwd << 16 |
2114 lumscale2_single_fwd << 8 |
2115 lumscale1_single_fwd << 0);
2116 OUT_BCS_BATCH(batch,
2117 lumshift2_double_fwd << 24 |
2118 lumshift1_double_fwd << 16 |
2119 lumscale2_double_fwd << 8 |
2120 lumscale1_double_fwd << 0);
2121 OUT_BCS_BATCH(batch,
2122 lumshift2_single_bwd << 24 |
2123 lumshift1_single_bwd << 16 |
2124 lumscale2_single_bwd << 8 |
2125 lumscale1_single_bwd << 0);
2126 OUT_BCS_BATCH(batch,
2131 ADVANCE_BCS_BATCH(batch);
2135 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
2136 struct decode_state *decode_state,
2137 struct gen7_mfd_context *gen7_mfd_context)
2139 struct i965_driver_data *i965 = i965_driver_data(ctx);
2140 VAPictureParameterBufferVC1 *pic_param;
2141 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2142 struct object_surface *obj_surface;
2143 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2145 int is_first_field = 1;
2147 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2149 if (!pic_param->sequence_fields.bits.interlace ||
2150 (pic_param->picture_fields.bits.frame_coding_mode < 2)) { /* Progressive or Frame-Interlace */
2151 picture_type = pic_param->picture_fields.bits.picture_type;
2152 } else {/* Field-Interlace */
2153 is_first_field = pic_param->picture_fields.bits.is_first_field;
2154 picture_type = fptype_to_picture_type[pic_param->picture_fields.bits.picture_type][!is_first_field];
2157 if (picture_type == GEN7_VC1_P_PICTURE ||
2158 picture_type == GEN7_VC1_SKIPPED_PICTURE) {
2159 obj_surface = decode_state->render_object;
2161 if (pic_param->sequence_fields.bits.interlace &&
2162 (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2163 (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2164 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2166 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2169 if (picture_type == GEN7_VC1_B_PICTURE) {
2170 obj_surface = decode_state->reference_objects[1];
2171 if (pic_param->backward_reference_picture != VA_INVALID_ID &&
2173 obj_surface->private_data) {
2175 if (pic_param->sequence_fields.bits.interlace &&
2176 (pic_param->picture_fields.bits.frame_coding_mode == 2) && /* Field-Interlace */
2177 (pic_param->picture_fields.bits.top_field_first ^ is_first_field))
2178 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_bottom;
2180 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv_top;
2184 BEGIN_BCS_BATCH(batch, 7);
2185 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
2187 if (dmv_write_buffer)
2188 OUT_BCS_RELOC64(batch, dmv_write_buffer,
2189 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2192 OUT_BCS_BATCH(batch, 0);
2193 OUT_BCS_BATCH(batch, 0);
2196 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2198 if (dmv_read_buffer)
2199 OUT_BCS_RELOC64(batch, dmv_read_buffer,
2200 I915_GEM_DOMAIN_INSTRUCTION, 0,
2203 OUT_BCS_BATCH(batch, 0);
2204 OUT_BCS_BATCH(batch, 0);
2207 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2209 ADVANCE_BCS_BATCH(batch);
2213 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
2215 int out_slice_data_bit_offset;
2216 int slice_header_size = in_slice_data_bit_offset / 8;
2219 if (profile == 3 && slice_header_size) { /* Advanced Profile */
2220 for (i = 0, j = 0; i < slice_header_size - 1; i++, j++)
2221 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4)
2224 if (i == slice_header_size - 1) {
2225 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
2233 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
2234 } else /* Simple or Main Profile */
2235 out_slice_data_bit_offset = in_slice_data_bit_offset;
2237 return out_slice_data_bit_offset;
2241 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
2242 VAPictureParameterBufferVC1 *pic_param,
2243 VASliceParameterBufferVC1 *slice_param,
2244 VASliceParameterBufferVC1 *next_slice_param,
2245 dri_bo *slice_data_bo,
2246 struct gen7_mfd_context *gen7_mfd_context)
2248 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2249 int next_slice_start_vert_pos;
2250 int macroblock_offset;
2251 uint8_t *slice_data = NULL;
2253 dri_bo_map(slice_data_bo, True);
2254 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
2255 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
2256 slice_param->macroblock_offset,
2257 pic_param->sequence_fields.bits.profile);
2258 dri_bo_unmap(slice_data_bo);
2260 if (next_slice_param)
2261 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
2262 else if (!pic_param->sequence_fields.bits.interlace ||
2263 pic_param->picture_fields.bits.frame_coding_mode < 2) /* Progressive or Frame-Interlace */
2264 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
2265 else /* Field-Interlace */
2266 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 32) / 32;
2268 BEGIN_BCS_BATCH(batch, 5);
2269 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
2270 OUT_BCS_BATCH(batch,
2271 slice_param->slice_data_size - (macroblock_offset >> 3));
2272 OUT_BCS_BATCH(batch,
2273 slice_param->slice_data_offset + (macroblock_offset >> 3));
2274 OUT_BCS_BATCH(batch,
2275 slice_param->slice_vertical_position << 16 |
2276 next_slice_start_vert_pos << 0);
2277 OUT_BCS_BATCH(batch,
2278 (macroblock_offset & 0x7));
2279 ADVANCE_BCS_BATCH(batch);
2283 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
2284 struct decode_state *decode_state,
2285 struct gen7_mfd_context *gen7_mfd_context)
2287 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2288 VAPictureParameterBufferVC1 *pic_param;
2289 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
2290 dri_bo *slice_data_bo;
2293 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2294 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2296 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
2297 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2298 intel_batchbuffer_emit_mi_flush(batch);
2299 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2300 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2301 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2302 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2303 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
2304 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
2305 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
2307 for (j = 0; j < decode_state->num_slice_params; j++) {
2308 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2309 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2310 slice_data_bo = decode_state->slice_datas[j]->bo;
2311 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
2313 if (j == decode_state->num_slice_params - 1)
2314 next_slice_group_param = NULL;
2316 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2318 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2319 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2321 if (i < decode_state->slice_params[j]->num_elements - 1)
2322 next_slice_param = slice_param + 1;
2324 next_slice_param = next_slice_group_param;
2326 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2331 intel_batchbuffer_end_atomic(batch);
2332 intel_batchbuffer_flush(batch);
2336 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
2337 struct decode_state *decode_state,
2338 struct gen7_mfd_context *gen7_mfd_context)
2340 struct object_surface *obj_surface;
2341 VAPictureParameterBufferJPEGBaseline *pic_param;
2342 int subsampling = SUBSAMPLE_YUV420;
2343 int fourcc = VA_FOURCC_IMC3;
2345 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2347 if (pic_param->num_components == 1) {
2348 subsampling = SUBSAMPLE_YUV400;
2349 fourcc = VA_FOURCC_Y800;
2350 } else if (pic_param->num_components == 3) {
2351 int h1 = pic_param->components[0].h_sampling_factor;
2352 int h2 = pic_param->components[1].h_sampling_factor;
2353 int h3 = pic_param->components[2].h_sampling_factor;
2354 int v1 = pic_param->components[0].v_sampling_factor;
2355 int v2 = pic_param->components[1].v_sampling_factor;
2356 int v3 = pic_param->components[2].v_sampling_factor;
2358 if (h1 == 2 * h2 && h2 == h3 &&
2359 v1 == 2 * v2 && v2 == v3) {
2360 subsampling = SUBSAMPLE_YUV420;
2361 fourcc = VA_FOURCC_IMC3;
2362 } else if (h1 == 2 * h2 && h2 == h3 &&
2363 v1 == v2 && v2 == v3) {
2364 subsampling = SUBSAMPLE_YUV422H;
2365 fourcc = VA_FOURCC_422H;
2366 } else if (h1 == h2 && h2 == h3 &&
2367 v1 == v2 && v2 == v3) {
2368 subsampling = SUBSAMPLE_YUV444;
2369 fourcc = VA_FOURCC_444P;
2370 } else if (h1 == 4 * h2 && h2 == h3 &&
2371 v1 == v2 && v2 == v3) {
2372 subsampling = SUBSAMPLE_YUV411;
2373 fourcc = VA_FOURCC_411P;
2374 } else if (h1 == h2 && h2 == h3 &&
2375 v1 == 2 * v2 && v2 == v3) {
2376 subsampling = SUBSAMPLE_YUV422V;
2377 fourcc = VA_FOURCC_422V;
2384 /* Current decoded picture */
2385 obj_surface = decode_state->render_object;
2386 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
2388 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2389 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2390 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2391 gen7_mfd_context->pre_deblocking_output.valid = 1;
2393 gen7_mfd_context->post_deblocking_output.bo = NULL;
2394 gen7_mfd_context->post_deblocking_output.valid = 0;
2396 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2397 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2399 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2400 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2402 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2403 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2405 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2406 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2408 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2409 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2412 static const int va_to_gen7_jpeg_rotation[4] = {
2413 GEN7_JPEG_ROTATION_0,
2414 GEN7_JPEG_ROTATION_90,
2415 GEN7_JPEG_ROTATION_180,
2416 GEN7_JPEG_ROTATION_270
2420 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
2421 struct decode_state *decode_state,
2422 struct gen7_mfd_context *gen7_mfd_context)
2424 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2425 VAPictureParameterBufferJPEGBaseline *pic_param;
2426 int chroma_type = GEN7_YUV420;
2427 int frame_width_in_blks;
2428 int frame_height_in_blks;
2430 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2431 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2433 if (pic_param->num_components == 1)
2434 chroma_type = GEN7_YUV400;
2435 else if (pic_param->num_components == 3) {
2436 int h1 = pic_param->components[0].h_sampling_factor;
2437 int h2 = pic_param->components[1].h_sampling_factor;
2438 int h3 = pic_param->components[2].h_sampling_factor;
2439 int v1 = pic_param->components[0].v_sampling_factor;
2440 int v2 = pic_param->components[1].v_sampling_factor;
2441 int v3 = pic_param->components[2].v_sampling_factor;
2443 if (h1 == 2 * h2 && h2 == h3 &&
2444 v1 == 2 * v2 && v2 == v3)
2445 chroma_type = GEN7_YUV420;
2446 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2447 v1 == 1 && v2 == 1 && v3 == 1)
2448 chroma_type = GEN7_YUV422H_2Y;
2449 else if (h1 == h2 && h2 == h3 &&
2450 v1 == v2 && v2 == v3)
2451 chroma_type = GEN7_YUV444;
2452 else if (h1 == 4 * h2 && h2 == h3 &&
2453 v1 == v2 && v2 == v3)
2454 chroma_type = GEN7_YUV411;
2455 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2456 v1 == 2 && v2 == 1 && v3 == 1)
2457 chroma_type = GEN7_YUV422V_2Y;
2458 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2459 v1 == 2 && v2 == 2 && v3 == 2)
2460 chroma_type = GEN7_YUV422H_4Y;
2461 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2462 v1 == 2 && v2 == 1 && v3 == 1)
2463 chroma_type = GEN7_YUV422V_4Y;
2468 if (chroma_type == GEN7_YUV400 ||
2469 chroma_type == GEN7_YUV444 ||
2470 chroma_type == GEN7_YUV422V_2Y) {
2471 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2472 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2473 } else if (chroma_type == GEN7_YUV411) {
2474 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2475 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2477 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2478 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2481 BEGIN_BCS_BATCH(batch, 3);
2482 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2483 OUT_BCS_BATCH(batch,
2484 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2485 (chroma_type << 0));
2486 OUT_BCS_BATCH(batch,
2487 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2488 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2489 ADVANCE_BCS_BATCH(batch);
2492 static const int va_to_gen7_jpeg_hufftable[2] = {
2498 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2499 struct decode_state *decode_state,
2500 struct gen7_mfd_context *gen7_mfd_context,
2503 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2504 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2507 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2510 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2512 for (index = 0; index < num_tables; index++) {
2513 int id = va_to_gen7_jpeg_hufftable[index];
2514 if (!huffman_table->load_huffman_table[index])
2516 BEGIN_BCS_BATCH(batch, 53);
2517 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2518 OUT_BCS_BATCH(batch, id);
2519 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2520 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2521 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2522 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2523 ADVANCE_BCS_BATCH(batch);
2527 static const int va_to_gen7_jpeg_qm[5] = {
2529 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2530 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2531 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2532 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2536 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2537 struct decode_state *decode_state,
2538 struct gen7_mfd_context *gen7_mfd_context)
2540 VAPictureParameterBufferJPEGBaseline *pic_param;
2541 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2544 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2547 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2548 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2550 assert(pic_param->num_components <= 3);
2552 for (index = 0; index < pic_param->num_components; index++) {
2553 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2555 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2556 unsigned char raster_qm[64];
2559 if (id > 4 || id < 1)
2562 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2565 qm_type = va_to_gen7_jpeg_qm[id];
2567 for (j = 0; j < 64; j++)
2568 raster_qm[zigzag_direct[j]] = qm[j];
2570 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2575 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2576 VAPictureParameterBufferJPEGBaseline *pic_param,
2577 VASliceParameterBufferJPEGBaseline *slice_param,
2578 VASliceParameterBufferJPEGBaseline *next_slice_param,
2579 dri_bo *slice_data_bo,
2580 struct gen7_mfd_context *gen7_mfd_context)
2582 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2583 int scan_component_mask = 0;
2586 assert(slice_param->num_components > 0);
2587 assert(slice_param->num_components < 4);
2588 assert(slice_param->num_components <= pic_param->num_components);
2590 for (i = 0; i < slice_param->num_components; i++) {
2591 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2593 scan_component_mask |= (1 << 0);
2596 scan_component_mask |= (1 << 1);
2599 scan_component_mask |= (1 << 2);
2607 BEGIN_BCS_BATCH(batch, 6);
2608 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2609 OUT_BCS_BATCH(batch,
2610 slice_param->slice_data_size);
2611 OUT_BCS_BATCH(batch,
2612 slice_param->slice_data_offset);
2613 OUT_BCS_BATCH(batch,
2614 slice_param->slice_horizontal_position << 16 |
2615 slice_param->slice_vertical_position << 0);
2616 OUT_BCS_BATCH(batch,
2617 ((slice_param->num_components != 1) << 30) | /* interleaved */
2618 (scan_component_mask << 27) | /* scan components */
2619 (0 << 26) | /* disable interrupt allowed */
2620 (slice_param->num_mcus << 0)); /* MCU count */
2621 OUT_BCS_BATCH(batch,
2622 (slice_param->restart_interval << 0)); /* RestartInterval */
2623 ADVANCE_BCS_BATCH(batch);
2626 /* Workaround for JPEG decoding on Ivybridge */
2632 unsigned char data[32];
2634 int data_bit_offset;
2636 } gen7_jpeg_wa_clip = {
2640 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2641 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2649 gen8_jpeg_wa_init(VADriverContextP ctx,
2650 struct gen7_mfd_context *gen7_mfd_context)
2652 struct i965_driver_data *i965 = i965_driver_data(ctx);
2654 struct object_surface *obj_surface;
2656 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2657 i965_DestroySurfaces(ctx,
2658 &gen7_mfd_context->jpeg_wa_surface_id,
2661 status = i965_CreateSurfaces(ctx,
2662 gen7_jpeg_wa_clip.width,
2663 gen7_jpeg_wa_clip.height,
2664 VA_RT_FORMAT_YUV420,
2666 &gen7_mfd_context->jpeg_wa_surface_id);
2667 assert(status == VA_STATUS_SUCCESS);
2669 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2670 assert(obj_surface);
2671 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2672 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2674 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2675 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2679 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2681 gen7_jpeg_wa_clip.data_size,
2682 gen7_jpeg_wa_clip.data);
2687 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2688 struct gen7_mfd_context *gen7_mfd_context)
2690 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2692 BEGIN_BCS_BATCH(batch, 5);
2693 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2694 OUT_BCS_BATCH(batch,
2695 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2696 (MFD_MODE_VLD << 15) | /* VLD mode */
2697 (0 << 10) | /* disable Stream-Out */
2698 (0 << 9) | /* Post Deblocking Output */
2699 (1 << 8) | /* Pre Deblocking Output */
2700 (0 << 5) | /* not in stitch mode */
2701 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2702 (MFX_FORMAT_AVC << 0));
2703 OUT_BCS_BATCH(batch,
2704 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2705 (0 << 3) | /* terminate if AVC mbdata error occurs */
2706 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2709 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2710 OUT_BCS_BATCH(batch, 0); /* reserved */
2711 ADVANCE_BCS_BATCH(batch);
2715 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2716 struct gen7_mfd_context *gen7_mfd_context)
2718 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2719 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2721 BEGIN_BCS_BATCH(batch, 6);
2722 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2723 OUT_BCS_BATCH(batch, 0);
2724 OUT_BCS_BATCH(batch,
2725 ((obj_surface->orig_width - 1) << 18) |
2726 ((obj_surface->orig_height - 1) << 4));
2727 OUT_BCS_BATCH(batch,
2728 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2729 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2730 (0 << 22) | /* surface object control state, ignored */
2731 ((obj_surface->width - 1) << 3) | /* pitch */
2732 (0 << 2) | /* must be 0 */
2733 (1 << 1) | /* must be tiled */
2734 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2735 OUT_BCS_BATCH(batch,
2736 (0 << 16) | /* X offset for U(Cb), must be 0 */
2737 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2738 OUT_BCS_BATCH(batch,
2739 (0 << 16) | /* X offset for V(Cr), must be 0 */
2740 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2741 ADVANCE_BCS_BATCH(batch);
2745 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2746 struct gen7_mfd_context *gen7_mfd_context)
2748 struct i965_driver_data *i965 = i965_driver_data(ctx);
2749 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2750 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2754 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2759 BEGIN_BCS_BATCH(batch, 61);
2760 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2761 OUT_BCS_RELOC64(batch,
2763 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2765 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2768 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2769 OUT_BCS_BATCH(batch, 0);
2770 OUT_BCS_BATCH(batch, 0);
2772 /* uncompressed-video & stream out 7-12 */
2773 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2774 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2775 OUT_BCS_BATCH(batch, 0);
2776 OUT_BCS_BATCH(batch, 0);
2777 OUT_BCS_BATCH(batch, 0);
2778 OUT_BCS_BATCH(batch, 0);
2780 /* the DW 13-15 is for intra row store scratch */
2781 OUT_BCS_RELOC64(batch,
2783 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2786 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2788 /* the DW 16-18 is for deblocking filter */
2789 OUT_BCS_BATCH(batch, 0);
2790 OUT_BCS_BATCH(batch, 0);
2791 OUT_BCS_BATCH(batch, 0);
2794 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2795 OUT_BCS_BATCH(batch, 0);
2796 OUT_BCS_BATCH(batch, 0);
2798 OUT_BCS_BATCH(batch, 0);
2800 /* the DW52-54 is for mb status address */
2801 OUT_BCS_BATCH(batch, 0);
2802 OUT_BCS_BATCH(batch, 0);
2803 OUT_BCS_BATCH(batch, 0);
2804 /* the DW56-60 is for ILDB & second ILDB address */
2805 OUT_BCS_BATCH(batch, 0);
2806 OUT_BCS_BATCH(batch, 0);
2807 OUT_BCS_BATCH(batch, 0);
2808 OUT_BCS_BATCH(batch, 0);
2809 OUT_BCS_BATCH(batch, 0);
2810 OUT_BCS_BATCH(batch, 0);
2812 ADVANCE_BCS_BATCH(batch);
2814 dri_bo_unreference(intra_bo);
2818 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2819 struct gen7_mfd_context *gen7_mfd_context)
2821 struct i965_driver_data *i965 = i965_driver_data(ctx);
2822 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2823 dri_bo *bsd_mpc_bo, *mpr_bo;
2825 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2826 "bsd mpc row store",
2827 11520, /* 1.5 * 120 * 64 */
2830 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2832 7680, /* 1. 0 * 120 * 64 */
2835 BEGIN_BCS_BATCH(batch, 10);
2836 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2838 OUT_BCS_RELOC64(batch,
2840 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2843 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2845 OUT_BCS_RELOC64(batch,
2847 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2849 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2851 OUT_BCS_BATCH(batch, 0);
2852 OUT_BCS_BATCH(batch, 0);
2853 OUT_BCS_BATCH(batch, 0);
2855 ADVANCE_BCS_BATCH(batch);
2857 dri_bo_unreference(bsd_mpc_bo);
2858 dri_bo_unreference(mpr_bo);
2862 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2863 struct gen7_mfd_context *gen7_mfd_context)
2869 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2870 struct gen7_mfd_context *gen7_mfd_context)
2872 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2874 int mbaff_frame_flag = 0;
2875 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2877 BEGIN_BCS_BATCH(batch, 16);
2878 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2879 OUT_BCS_BATCH(batch,
2880 width_in_mbs * height_in_mbs);
2881 OUT_BCS_BATCH(batch,
2882 ((height_in_mbs - 1) << 16) |
2883 ((width_in_mbs - 1) << 0));
2884 OUT_BCS_BATCH(batch,
2889 (0 << 12) | /* differ from GEN6 */
2892 OUT_BCS_BATCH(batch,
2893 (1 << 10) | /* 4:2:0 */
2894 (1 << 7) | /* CABAC */
2900 (mbaff_frame_flag << 1) |
2902 OUT_BCS_BATCH(batch, 0);
2903 OUT_BCS_BATCH(batch, 0);
2904 OUT_BCS_BATCH(batch, 0);
2905 OUT_BCS_BATCH(batch, 0);
2906 OUT_BCS_BATCH(batch, 0);
2907 OUT_BCS_BATCH(batch, 0);
2908 OUT_BCS_BATCH(batch, 0);
2909 OUT_BCS_BATCH(batch, 0);
2910 OUT_BCS_BATCH(batch, 0);
2911 OUT_BCS_BATCH(batch, 0);
2912 OUT_BCS_BATCH(batch, 0);
2913 ADVANCE_BCS_BATCH(batch);
2917 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2918 struct gen7_mfd_context *gen7_mfd_context)
2920 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2923 BEGIN_BCS_BATCH(batch, 71);
2924 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2926 /* reference surfaces 0..15 */
2927 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2928 OUT_BCS_BATCH(batch, 0); /* top */
2929 OUT_BCS_BATCH(batch, 0); /* bottom */
2932 OUT_BCS_BATCH(batch, 0);
2934 /* the current decoding frame/field */
2935 OUT_BCS_BATCH(batch, 0); /* top */
2936 OUT_BCS_BATCH(batch, 0);
2937 OUT_BCS_BATCH(batch, 0);
2940 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2941 OUT_BCS_BATCH(batch, 0);
2942 OUT_BCS_BATCH(batch, 0);
2945 OUT_BCS_BATCH(batch, 0);
2946 OUT_BCS_BATCH(batch, 0);
2948 ADVANCE_BCS_BATCH(batch);
2952 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2953 struct gen7_mfd_context *gen7_mfd_context)
2955 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2957 BEGIN_BCS_BATCH(batch, 11);
2958 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2959 OUT_BCS_RELOC64(batch,
2960 gen7_mfd_context->jpeg_wa_slice_data_bo,
2961 I915_GEM_DOMAIN_INSTRUCTION, 0,
2963 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
2964 OUT_BCS_BATCH(batch, 0);
2965 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2966 OUT_BCS_BATCH(batch, 0);
2967 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2968 OUT_BCS_BATCH(batch, 0);
2969 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2970 OUT_BCS_BATCH(batch, 0);
2971 ADVANCE_BCS_BATCH(batch);
2975 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2976 struct gen7_mfd_context *gen7_mfd_context)
2978 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2980 /* the input bitsteam format on GEN7 differs from GEN6 */
2981 BEGIN_BCS_BATCH(batch, 6);
2982 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2983 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2984 OUT_BCS_BATCH(batch, 0);
2985 OUT_BCS_BATCH(batch,
2991 OUT_BCS_BATCH(batch,
2992 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2995 (1 << 3) | /* LastSlice Flag */
2996 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2997 OUT_BCS_BATCH(batch, 0);
2998 ADVANCE_BCS_BATCH(batch);
3002 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
3003 struct gen7_mfd_context *gen7_mfd_context)
3005 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3006 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
3007 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
3008 int first_mb_in_slice = 0;
3009 int slice_type = SLICE_TYPE_I;
3011 BEGIN_BCS_BATCH(batch, 11);
3012 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3013 OUT_BCS_BATCH(batch, slice_type);
3014 OUT_BCS_BATCH(batch,
3015 (num_ref_idx_l1 << 24) |
3016 (num_ref_idx_l0 << 16) |
3019 OUT_BCS_BATCH(batch,
3021 (1 << 27) | /* disable Deblocking */
3023 (gen7_jpeg_wa_clip.qp << 16) |
3026 OUT_BCS_BATCH(batch,
3027 (slice_ver_pos << 24) |
3028 (slice_hor_pos << 16) |
3029 (first_mb_in_slice << 0));
3030 OUT_BCS_BATCH(batch,
3031 (next_slice_ver_pos << 16) |
3032 (next_slice_hor_pos << 0));
3033 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
3034 OUT_BCS_BATCH(batch, 0);
3035 OUT_BCS_BATCH(batch, 0);
3036 OUT_BCS_BATCH(batch, 0);
3037 OUT_BCS_BATCH(batch, 0);
3038 ADVANCE_BCS_BATCH(batch);
3042 gen8_mfd_jpeg_wa(VADriverContextP ctx,
3043 struct gen7_mfd_context *gen7_mfd_context)
3045 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3046 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
3047 intel_batchbuffer_emit_mi_flush(batch);
3048 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
3049 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
3050 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
3051 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
3052 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
3053 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
3054 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
3056 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
3057 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
3058 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
3064 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
3065 struct decode_state *decode_state,
3066 struct gen7_mfd_context *gen7_mfd_context)
3068 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3069 VAPictureParameterBufferJPEGBaseline *pic_param;
3070 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
3071 dri_bo *slice_data_bo;
3072 int i, j, max_selector = 0;
3074 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3075 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
3077 /* Currently only support Baseline DCT */
3078 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
3079 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3081 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
3083 intel_batchbuffer_emit_mi_flush(batch);
3084 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3085 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3086 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3087 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
3088 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
3090 for (j = 0; j < decode_state->num_slice_params; j++) {
3091 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3092 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3093 slice_data_bo = decode_state->slice_datas[j]->bo;
3094 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3096 if (j == decode_state->num_slice_params - 1)
3097 next_slice_group_param = NULL;
3099 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3101 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3104 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3106 if (i < decode_state->slice_params[j]->num_elements - 1)
3107 next_slice_param = slice_param + 1;
3109 next_slice_param = next_slice_group_param;
3111 for (component = 0; component < slice_param->num_components; component++) {
3112 if (max_selector < slice_param->components[component].dc_table_selector)
3113 max_selector = slice_param->components[component].dc_table_selector;
3115 if (max_selector < slice_param->components[component].ac_table_selector)
3116 max_selector = slice_param->components[component].ac_table_selector;
3123 assert(max_selector < 2);
3124 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
3126 for (j = 0; j < decode_state->num_slice_params; j++) {
3127 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3128 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3129 slice_data_bo = decode_state->slice_datas[j]->bo;
3130 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3132 if (j == decode_state->num_slice_params - 1)
3133 next_slice_group_param = NULL;
3135 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3137 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3138 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3140 if (i < decode_state->slice_params[j]->num_elements - 1)
3141 next_slice_param = slice_param + 1;
3143 next_slice_param = next_slice_group_param;
3145 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
3150 intel_batchbuffer_end_atomic(batch);
3151 intel_batchbuffer_flush(batch);
3154 static const int vp8_dc_qlookup[128] = {
3155 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
3156 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
3157 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
3158 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
3159 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
3160 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
3161 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
3162 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
3165 static const int vp8_ac_qlookup[128] = {
3166 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3167 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
3168 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
3169 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
3170 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
3171 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
3172 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
3173 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
3176 static inline unsigned int vp8_clip_quantization_index(int index)
3187 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
3188 struct decode_state *decode_state,
3189 struct gen7_mfd_context *gen7_mfd_context)
3191 struct object_surface *obj_surface;
3192 struct i965_driver_data *i965 = i965_driver_data(ctx);
3194 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3195 int width_in_mbs = (pic_param->frame_width + 15) / 16;
3196 int height_in_mbs = (pic_param->frame_height + 15) / 16;
3198 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
3199 assert(height_in_mbs > 0 && height_in_mbs <= 256);
3201 intel_update_vp8_frame_store_index(ctx,
3204 gen7_mfd_context->reference_surface);
3206 /* Current decoded picture */
3207 obj_surface = decode_state->render_object;
3208 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3210 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3211 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
3212 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
3213 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
3215 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3216 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
3217 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
3218 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
3220 intel_ensure_vp8_segmentation_buffer(ctx,
3221 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
3223 /* The same as AVC */
3224 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3225 bo = dri_bo_alloc(i965->intel.bufmgr,
3230 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
3231 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
3233 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3234 bo = dri_bo_alloc(i965->intel.bufmgr,
3235 "deblocking filter row store",
3236 width_in_mbs * 64 * 4,
3239 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
3240 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
3242 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3243 bo = dri_bo_alloc(i965->intel.bufmgr,
3244 "bsd mpc row store",
3245 width_in_mbs * 64 * 2,
3248 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
3249 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
3251 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3252 bo = dri_bo_alloc(i965->intel.bufmgr,
3254 width_in_mbs * 64 * 2,
3257 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
3258 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
3260 gen7_mfd_context->bitplane_read_buffer.valid = 0;
3264 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
3265 struct decode_state *decode_state,
3266 struct gen7_mfd_context *gen7_mfd_context)
3268 struct i965_driver_data *i965 = i965_driver_data(ctx);
3269 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3270 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3271 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
3272 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
3273 dri_bo *probs_bo = decode_state->probability_data->bo;
3275 unsigned int quantization_value[4][6];
3277 /* There is no safe way to error out if the segmentation buffer
3278 could not be allocated. So, instead of aborting, simply decode
3279 something even if the result may look totally inacurate */
3280 const unsigned int enable_segmentation =
3281 pic_param->pic_fields.bits.segmentation_enabled &&
3282 gen7_mfd_context->segmentation_buffer.valid;
3284 log2num = (int)log2(slice_param->num_of_partitions - 1);
3286 BEGIN_BCS_BATCH(batch, 38);
3287 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
3288 OUT_BCS_BATCH(batch,
3289 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
3290 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
3291 OUT_BCS_BATCH(batch,
3293 pic_param->pic_fields.bits.sharpness_level << 16 |
3294 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
3295 pic_param->pic_fields.bits.sign_bias_golden << 12 |
3296 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
3297 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
3298 (enable_segmentation &&
3299 pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
3300 pic_param->pic_fields.bits.segmentation_enabled << 8 |
3301 (enable_segmentation &&
3302 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
3303 (enable_segmentation &&
3304 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
3305 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
3306 pic_param->pic_fields.bits.filter_type << 4 |
3307 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
3308 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
3310 OUT_BCS_BATCH(batch,
3311 pic_param->loop_filter_level[3] << 24 |
3312 pic_param->loop_filter_level[2] << 16 |
3313 pic_param->loop_filter_level[1] << 8 |
3314 pic_param->loop_filter_level[0] << 0);
3316 /* Quantizer Value for 4 segmetns, DW4-DW15 */
3317 for (i = 0; i < 4; i++) {
3318 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
3319 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
3320 quantization_value[i][2] = 2 * vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])]; /*y2dc*/
3321 /* 101581>>16 is equivalent to 155/100 */
3322 quantization_value[i][3] = (101581 * vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16; /*y2ac*/
3323 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
3324 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
3326 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
3327 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
3329 OUT_BCS_BATCH(batch,
3330 quantization_value[i][0] << 16 | /* Y1AC */
3331 quantization_value[i][1] << 0); /* Y1DC */
3332 OUT_BCS_BATCH(batch,
3333 quantization_value[i][5] << 16 | /* UVAC */
3334 quantization_value[i][4] << 0); /* UVDC */
3335 OUT_BCS_BATCH(batch,
3336 quantization_value[i][3] << 16 | /* Y2AC */
3337 quantization_value[i][2] << 0); /* Y2DC */
3340 /* CoeffProbability table for non-key frame, DW16-DW18 */
3342 OUT_BCS_RELOC64(batch, probs_bo,
3343 0, I915_GEM_DOMAIN_INSTRUCTION,
3345 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
3347 OUT_BCS_BATCH(batch, 0);
3348 OUT_BCS_BATCH(batch, 0);
3349 OUT_BCS_BATCH(batch, 0);
3352 OUT_BCS_BATCH(batch,
3353 pic_param->mb_segment_tree_probs[2] << 16 |
3354 pic_param->mb_segment_tree_probs[1] << 8 |
3355 pic_param->mb_segment_tree_probs[0] << 0);
3357 OUT_BCS_BATCH(batch,
3358 pic_param->prob_skip_false << 24 |
3359 pic_param->prob_intra << 16 |
3360 pic_param->prob_last << 8 |
3361 pic_param->prob_gf << 0);
3363 OUT_BCS_BATCH(batch,
3364 pic_param->y_mode_probs[3] << 24 |
3365 pic_param->y_mode_probs[2] << 16 |
3366 pic_param->y_mode_probs[1] << 8 |
3367 pic_param->y_mode_probs[0] << 0);
3369 OUT_BCS_BATCH(batch,
3370 pic_param->uv_mode_probs[2] << 16 |
3371 pic_param->uv_mode_probs[1] << 8 |
3372 pic_param->uv_mode_probs[0] << 0);
3374 /* MV update value, DW23-DW32 */
3375 for (i = 0; i < 2; i++) {
3376 for (j = 0; j < 20; j += 4) {
3377 OUT_BCS_BATCH(batch,
3378 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
3379 pic_param->mv_probs[i][j + 2] << 16 |
3380 pic_param->mv_probs[i][j + 1] << 8 |
3381 pic_param->mv_probs[i][j + 0] << 0);
3385 OUT_BCS_BATCH(batch,
3386 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
3387 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
3388 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
3389 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
3391 OUT_BCS_BATCH(batch,
3392 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
3393 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
3394 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
3395 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
3397 /* segmentation id stream base address, DW35-DW37 */
3398 if (enable_segmentation) {
3399 OUT_BCS_RELOC64(batch, gen7_mfd_context->segmentation_buffer.bo,
3400 0, I915_GEM_DOMAIN_INSTRUCTION,
3402 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
3404 OUT_BCS_BATCH(batch, 0);
3405 OUT_BCS_BATCH(batch, 0);
3406 OUT_BCS_BATCH(batch, 0);
3408 ADVANCE_BCS_BATCH(batch);
3412 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
3413 VAPictureParameterBufferVP8 *pic_param,
3414 VASliceParameterBufferVP8 *slice_param,
3415 dri_bo *slice_data_bo,
3416 struct gen7_mfd_context *gen7_mfd_context)
3418 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3420 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7) >> 3);
3421 unsigned int used_bits = 8 - pic_param->bool_coder_ctx.count;
3422 unsigned int partition_size_0 = slice_param->partition_size[0];
3424 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
3425 if (used_bits == 8) {
3428 partition_size_0 -= 1;
3431 assert(slice_param->num_of_partitions >= 2);
3432 assert(slice_param->num_of_partitions <= 9);
3434 log2num = (int)log2(slice_param->num_of_partitions - 1);
3436 BEGIN_BCS_BATCH(batch, 22);
3437 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
3438 OUT_BCS_BATCH(batch,
3439 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
3440 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
3442 (slice_param->macroblock_offset & 0x7));
3443 OUT_BCS_BATCH(batch,
3444 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3447 OUT_BCS_BATCH(batch, partition_size_0 + 1);
3448 OUT_BCS_BATCH(batch, offset);
3449 //partion sizes in bytes are present after the above first partition when there are more than one token partition
3450 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3451 for (i = 1; i < 9; i++) {
3452 if (i < slice_param->num_of_partitions) {
3453 OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
3454 OUT_BCS_BATCH(batch, offset);
3456 OUT_BCS_BATCH(batch, 0);
3457 OUT_BCS_BATCH(batch, 0);
3460 offset += slice_param->partition_size[i];
3463 OUT_BCS_BATCH(batch, 0); /* concealment method */
3465 ADVANCE_BCS_BATCH(batch);
3469 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3470 struct decode_state *decode_state,
3471 struct gen7_mfd_context *gen7_mfd_context)
3473 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3474 VAPictureParameterBufferVP8 *pic_param;
3475 VASliceParameterBufferVP8 *slice_param;
3476 dri_bo *slice_data_bo;
3478 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3479 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3481 /* one slice per frame */
3482 if (decode_state->num_slice_params != 1 ||
3483 (!decode_state->slice_params ||
3484 !decode_state->slice_params[0] ||
3485 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3486 (!decode_state->slice_datas ||
3487 !decode_state->slice_datas[0] ||
3488 !decode_state->slice_datas[0]->bo) ||
3489 !decode_state->probability_data) {
3490 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3495 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3496 slice_data_bo = decode_state->slice_datas[0]->bo;
3498 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3499 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3500 intel_batchbuffer_emit_mi_flush(batch);
3501 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3502 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3503 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3504 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3505 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3506 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3507 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3508 intel_batchbuffer_end_atomic(batch);
3509 intel_batchbuffer_flush(batch);
3513 gen8_mfd_decode_picture(VADriverContextP ctx,
3515 union codec_state *codec_state,
3516 struct hw_context *hw_context)
3519 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3520 struct decode_state *decode_state = &codec_state->decode;
3523 assert(gen7_mfd_context);
3525 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3527 if (vaStatus != VA_STATUS_SUCCESS)
3530 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3533 case VAProfileMPEG2Simple:
3534 case VAProfileMPEG2Main:
3535 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3538 case VAProfileH264ConstrainedBaseline:
3539 case VAProfileH264Main:
3540 case VAProfileH264High:
3541 case VAProfileH264StereoHigh:
3542 case VAProfileH264MultiviewHigh:
3543 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3546 case VAProfileVC1Simple:
3547 case VAProfileVC1Main:
3548 case VAProfileVC1Advanced:
3549 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3552 case VAProfileJPEGBaseline:
3553 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3556 case VAProfileVP8Version0_3:
3557 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3565 vaStatus = VA_STATUS_SUCCESS;
3572 gen8_mfd_context_destroy(void *hw_context)
3574 VADriverContextP ctx;
3575 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3577 ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
3579 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3580 gen7_mfd_context->post_deblocking_output.bo = NULL;
3582 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3583 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3585 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3586 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3588 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3589 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3591 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3592 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3594 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3595 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3597 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3598 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3600 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3601 gen7_mfd_context->segmentation_buffer.bo = NULL;
3603 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3605 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
3606 i965_DestroySurfaces(ctx,
3607 &gen7_mfd_context->jpeg_wa_surface_id,
3609 gen7_mfd_context->jpeg_wa_surface_object = NULL;
3612 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3613 free(gen7_mfd_context);
3616 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3617 struct gen7_mfd_context *gen7_mfd_context)
3619 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3620 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3621 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3622 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3626 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3628 struct intel_driver_data *intel = intel_driver_data(ctx);
3629 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3632 if (!gen7_mfd_context)
3635 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3636 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3637 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3639 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3640 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3641 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3644 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3645 gen7_mfd_context->segmentation_buffer.valid = 0;
3647 switch (obj_config->profile) {
3648 case VAProfileMPEG2Simple:
3649 case VAProfileMPEG2Main:
3650 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3653 case VAProfileH264ConstrainedBaseline:
3654 case VAProfileH264Main:
3655 case VAProfileH264High:
3656 case VAProfileH264StereoHigh:
3657 case VAProfileH264MultiviewHigh:
3658 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3664 gen7_mfd_context->driver_context = ctx;
3665 return (struct hw_context *)gen7_mfd_context;