2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 gen7_avc_surface->base.frame_store_id = -1;
78 assert((obj_surface->size & 0x3f) == 0);
79 obj_surface->private_data = gen7_avc_surface;
82 /* DMV buffers now relate to the whole frame, irrespective of
84 if (gen7_avc_surface->dmv_top == NULL) {
85 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86 "direct mv w/r buffer",
87 width_in_mbs * height_in_mbs * 128,
89 assert(gen7_avc_surface->dmv_top);
94 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
95 struct decode_state *decode_state,
97 struct gen7_mfd_context *gen7_mfd_context)
99 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
101 assert(standard_select == MFX_FORMAT_MPEG2 ||
102 standard_select == MFX_FORMAT_AVC ||
103 standard_select == MFX_FORMAT_VC1 ||
104 standard_select == MFX_FORMAT_JPEG ||
105 standard_select == MFX_FORMAT_VP8);
107 BEGIN_BCS_BATCH(batch, 5);
108 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
110 (MFX_LONG_MODE << 17) | /* Currently only support long format */
111 (MFD_MODE_VLD << 15) | /* VLD mode */
112 (0 << 10) | /* disable Stream-Out */
113 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
114 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
115 (0 << 5) | /* not in stitch mode */
116 (MFX_CODEC_DECODE << 4) | /* decoding mode */
117 (standard_select << 0));
119 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
120 (0 << 3) | /* terminate if AVC mbdata error occurs */
121 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
124 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
125 OUT_BCS_BATCH(batch, 0); /* reserved */
126 ADVANCE_BCS_BATCH(batch);
130 gen8_mfd_surface_state(VADriverContextP ctx,
131 struct decode_state *decode_state,
133 struct gen7_mfd_context *gen7_mfd_context)
135 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
136 struct object_surface *obj_surface = decode_state->render_object;
137 unsigned int y_cb_offset;
138 unsigned int y_cr_offset;
139 unsigned int surface_format;
143 y_cb_offset = obj_surface->y_cb_offset;
144 y_cr_offset = obj_surface->y_cr_offset;
146 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
147 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
149 BEGIN_BCS_BATCH(batch, 6);
150 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
151 OUT_BCS_BATCH(batch, 0);
153 ((obj_surface->orig_height - 1) << 18) |
154 ((obj_surface->orig_width - 1) << 4));
156 (surface_format << 28) | /* 420 planar YUV surface */
157 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
158 (0 << 22) | /* surface object control state, ignored */
159 ((obj_surface->width - 1) << 3) | /* pitch */
160 (0 << 2) | /* must be 0 */
161 (1 << 1) | /* must be tiled */
162 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
164 (0 << 16) | /* X offset for U(Cb), must be 0 */
165 (y_cb_offset << 0)); /* Y offset for U(Cb) */
167 (0 << 16) | /* X offset for V(Cr), must be 0 */
168 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
169 ADVANCE_BCS_BATCH(batch);
173 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
174 struct decode_state *decode_state,
176 struct gen7_mfd_context *gen7_mfd_context)
178 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
181 BEGIN_BCS_BATCH(batch, 61);
182 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
183 /* Pre-deblock 1-3 */
184 if (gen7_mfd_context->pre_deblocking_output.valid)
185 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
186 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
189 OUT_BCS_BATCH(batch, 0);
191 OUT_BCS_BATCH(batch, 0);
192 OUT_BCS_BATCH(batch, 0);
193 /* Post-debloing 4-6 */
194 if (gen7_mfd_context->post_deblocking_output.valid)
195 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
196 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
199 OUT_BCS_BATCH(batch, 0);
201 OUT_BCS_BATCH(batch, 0);
202 OUT_BCS_BATCH(batch, 0);
204 /* uncompressed-video & stream out 7-12 */
205 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
207 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
209 OUT_BCS_BATCH(batch, 0);
210 OUT_BCS_BATCH(batch, 0);
212 /* intra row-store scratch 13-15 */
213 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
214 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
215 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
218 OUT_BCS_BATCH(batch, 0);
220 OUT_BCS_BATCH(batch, 0);
221 OUT_BCS_BATCH(batch, 0);
222 /* deblocking-filter-row-store 16-18 */
223 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
224 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
225 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
228 OUT_BCS_BATCH(batch, 0);
229 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, 0);
233 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
234 struct object_surface *obj_surface;
236 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
237 gen7_mfd_context->reference_surface[i].obj_surface &&
238 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
239 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
241 OUT_BCS_RELOC(batch, obj_surface->bo,
242 I915_GEM_DOMAIN_INSTRUCTION, 0,
245 OUT_BCS_BATCH(batch, 0);
248 OUT_BCS_BATCH(batch, 0);
251 /* reference property 51 */
252 OUT_BCS_BATCH(batch, 0);
254 /* Macroblock status & ILDB 52-57 */
255 OUT_BCS_BATCH(batch, 0);
256 OUT_BCS_BATCH(batch, 0);
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
259 OUT_BCS_BATCH(batch, 0);
260 OUT_BCS_BATCH(batch, 0);
262 /* the second Macroblock status 58-60 */
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
265 OUT_BCS_BATCH(batch, 0);
267 ADVANCE_BCS_BATCH(batch);
271 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
272 dri_bo *slice_data_bo,
274 struct gen7_mfd_context *gen7_mfd_context)
276 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
278 BEGIN_BCS_BATCH(batch, 26);
279 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
281 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
282 OUT_BCS_BATCH(batch, 0);
283 OUT_BCS_BATCH(batch, 0);
284 /* Upper bound 4-5 */
285 OUT_BCS_BATCH(batch, 0);
286 OUT_BCS_BATCH(batch, 0);
288 /* MFX indirect MV 6-10 */
289 OUT_BCS_BATCH(batch, 0);
290 OUT_BCS_BATCH(batch, 0);
291 OUT_BCS_BATCH(batch, 0);
292 OUT_BCS_BATCH(batch, 0);
293 OUT_BCS_BATCH(batch, 0);
295 /* MFX IT_COFF 11-15 */
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
299 OUT_BCS_BATCH(batch, 0);
300 OUT_BCS_BATCH(batch, 0);
302 /* MFX IT_DBLK 16-20 */
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
306 OUT_BCS_BATCH(batch, 0);
307 OUT_BCS_BATCH(batch, 0);
309 /* MFX PAK_BSE object for encoder 21-25 */
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
313 OUT_BCS_BATCH(batch, 0);
314 OUT_BCS_BATCH(batch, 0);
316 ADVANCE_BCS_BATCH(batch);
320 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
321 struct decode_state *decode_state,
323 struct gen7_mfd_context *gen7_mfd_context)
325 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
327 BEGIN_BCS_BATCH(batch, 10);
328 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
330 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
331 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
332 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
335 OUT_BCS_BATCH(batch, 0);
337 OUT_BCS_BATCH(batch, 0);
338 OUT_BCS_BATCH(batch, 0);
339 /* MPR Row Store Scratch buffer 4-6 */
340 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
341 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
342 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
345 OUT_BCS_BATCH(batch, 0);
347 OUT_BCS_BATCH(batch, 0);
348 OUT_BCS_BATCH(batch, 0);
351 if (gen7_mfd_context->bitplane_read_buffer.valid)
352 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
353 I915_GEM_DOMAIN_INSTRUCTION, 0,
356 OUT_BCS_BATCH(batch, 0);
357 OUT_BCS_BATCH(batch, 0);
358 OUT_BCS_BATCH(batch, 0);
359 ADVANCE_BCS_BATCH(batch);
363 gen8_mfd_qm_state(VADriverContextP ctx,
367 struct gen7_mfd_context *gen7_mfd_context)
369 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
370 unsigned int qm_buffer[16];
372 assert(qm_length <= 16 * 4);
373 memcpy(qm_buffer, qm, qm_length);
375 BEGIN_BCS_BATCH(batch, 18);
376 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
377 OUT_BCS_BATCH(batch, qm_type << 0);
378 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
379 ADVANCE_BCS_BATCH(batch);
383 gen8_mfd_avc_img_state(VADriverContextP ctx,
384 struct decode_state *decode_state,
385 struct gen7_mfd_context *gen7_mfd_context)
387 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
389 int mbaff_frame_flag;
390 unsigned int width_in_mbs, height_in_mbs;
391 VAPictureParameterBufferH264 *pic_param;
393 assert(decode_state->pic_param && decode_state->pic_param->buffer);
394 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
395 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
397 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
399 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
404 if ((img_struct & 0x1) == 0x1) {
405 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
407 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
410 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
411 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
412 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
414 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
417 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
418 !pic_param->pic_fields.bits.field_pic_flag);
420 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
421 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
423 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
424 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
425 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
426 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
428 BEGIN_BCS_BATCH(batch, 17);
429 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
431 (width_in_mbs * height_in_mbs - 1));
433 ((height_in_mbs - 1) << 16) |
434 ((width_in_mbs - 1) << 0));
436 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
437 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
438 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
439 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
440 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
441 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
444 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
445 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
446 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
447 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
448 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
449 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
450 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
451 (mbaff_frame_flag << 1) |
452 (pic_param->pic_fields.bits.field_pic_flag << 0));
453 OUT_BCS_BATCH(batch, 0);
454 OUT_BCS_BATCH(batch, 0);
455 OUT_BCS_BATCH(batch, 0);
456 OUT_BCS_BATCH(batch, 0);
457 OUT_BCS_BATCH(batch, 0);
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 OUT_BCS_BATCH(batch, 0);
465 ADVANCE_BCS_BATCH(batch);
469 gen8_mfd_avc_qm_state(VADriverContextP ctx,
470 struct decode_state *decode_state,
471 struct gen7_mfd_context *gen7_mfd_context)
473 VAIQMatrixBufferH264 *iq_matrix;
474 VAPictureParameterBufferH264 *pic_param;
476 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
477 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
479 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
481 assert(decode_state->pic_param && decode_state->pic_param->buffer);
482 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
484 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
485 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
487 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
488 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
489 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
494 gen8_mfd_avc_picid_state(VADriverContextP ctx,
495 struct decode_state *decode_state,
496 struct gen7_mfd_context *gen7_mfd_context)
498 gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
499 gen7_mfd_context->reference_surface);
503 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
504 struct decode_state *decode_state,
505 VAPictureParameterBufferH264 *pic_param,
506 VASliceParameterBufferH264 *slice_param,
507 struct gen7_mfd_context *gen7_mfd_context)
509 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
510 struct object_surface *obj_surface;
511 GenAvcSurface *gen7_avc_surface;
512 VAPictureH264 *va_pic;
515 BEGIN_BCS_BATCH(batch, 71);
516 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
518 /* reference surfaces 0..15 */
519 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
520 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
521 gen7_mfd_context->reference_surface[i].obj_surface &&
522 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
524 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
525 gen7_avc_surface = obj_surface->private_data;
527 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
528 I915_GEM_DOMAIN_INSTRUCTION, 0,
530 OUT_BCS_BATCH(batch, 0);
532 OUT_BCS_BATCH(batch, 0);
533 OUT_BCS_BATCH(batch, 0);
537 OUT_BCS_BATCH(batch, 0);
539 /* the current decoding frame/field */
540 va_pic = &pic_param->CurrPic;
541 obj_surface = decode_state->render_object;
542 assert(obj_surface->bo && obj_surface->private_data);
543 gen7_avc_surface = obj_surface->private_data;
545 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
546 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
549 OUT_BCS_BATCH(batch, 0);
550 OUT_BCS_BATCH(batch, 0);
553 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
554 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
557 const VAPictureH264 * const va_pic = avc_find_picture(
558 obj_surface->base.id, pic_param->ReferenceFrames,
559 ARRAY_ELEMS(pic_param->ReferenceFrames));
561 assert(va_pic != NULL);
562 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
563 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
565 OUT_BCS_BATCH(batch, 0);
566 OUT_BCS_BATCH(batch, 0);
570 va_pic = &pic_param->CurrPic;
571 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
572 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
574 ADVANCE_BCS_BATCH(batch);
578 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
579 VAPictureParameterBufferH264 *pic_param,
580 VASliceParameterBufferH264 *next_slice_param,
581 struct gen7_mfd_context *gen7_mfd_context)
583 gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
587 gen8_mfd_avc_slice_state(VADriverContextP ctx,
588 VAPictureParameterBufferH264 *pic_param,
589 VASliceParameterBufferH264 *slice_param,
590 VASliceParameterBufferH264 *next_slice_param,
591 struct gen7_mfd_context *gen7_mfd_context)
593 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
594 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
595 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
596 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
597 int num_ref_idx_l0, num_ref_idx_l1;
598 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
599 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
600 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
603 if (slice_param->slice_type == SLICE_TYPE_I ||
604 slice_param->slice_type == SLICE_TYPE_SI) {
605 slice_type = SLICE_TYPE_I;
606 } else if (slice_param->slice_type == SLICE_TYPE_P ||
607 slice_param->slice_type == SLICE_TYPE_SP) {
608 slice_type = SLICE_TYPE_P;
610 assert(slice_param->slice_type == SLICE_TYPE_B);
611 slice_type = SLICE_TYPE_B;
614 if (slice_type == SLICE_TYPE_I) {
615 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
616 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
619 } else if (slice_type == SLICE_TYPE_P) {
620 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
621 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
624 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
625 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
628 first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
629 slice_hor_pos = first_mb_in_slice % width_in_mbs;
630 slice_ver_pos = first_mb_in_slice / width_in_mbs;
632 if (next_slice_param) {
633 first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
634 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
635 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
637 next_slice_hor_pos = 0;
638 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
641 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
642 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
643 OUT_BCS_BATCH(batch, slice_type);
645 (num_ref_idx_l1 << 24) |
646 (num_ref_idx_l0 << 16) |
647 (slice_param->chroma_log2_weight_denom << 8) |
648 (slice_param->luma_log2_weight_denom << 0));
650 (slice_param->direct_spatial_mv_pred_flag << 29) |
651 (slice_param->disable_deblocking_filter_idc << 27) |
652 (slice_param->cabac_init_idc << 24) |
653 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
654 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
655 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
657 (slice_ver_pos << 24) |
658 (slice_hor_pos << 16) |
659 (first_mb_in_slice << 0));
661 (next_slice_ver_pos << 16) |
662 (next_slice_hor_pos << 0));
664 (next_slice_param == NULL) << 19); /* last slice flag */
665 OUT_BCS_BATCH(batch, 0);
666 OUT_BCS_BATCH(batch, 0);
667 OUT_BCS_BATCH(batch, 0);
668 OUT_BCS_BATCH(batch, 0);
669 ADVANCE_BCS_BATCH(batch);
673 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
674 VAPictureParameterBufferH264 *pic_param,
675 VASliceParameterBufferH264 *slice_param,
676 struct gen7_mfd_context *gen7_mfd_context)
678 gen6_send_avc_ref_idx_state(
679 gen7_mfd_context->base.batch,
681 gen7_mfd_context->reference_surface
686 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
687 VAPictureParameterBufferH264 *pic_param,
688 VASliceParameterBufferH264 *slice_param,
689 struct gen7_mfd_context *gen7_mfd_context)
691 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
692 int i, j, num_weight_offset_table = 0;
693 short weightoffsets[32 * 6];
695 if ((slice_param->slice_type == SLICE_TYPE_P ||
696 slice_param->slice_type == SLICE_TYPE_SP) &&
697 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
698 num_weight_offset_table = 1;
701 if ((slice_param->slice_type == SLICE_TYPE_B) &&
702 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
703 num_weight_offset_table = 2;
706 for (i = 0; i < num_weight_offset_table; i++) {
707 BEGIN_BCS_BATCH(batch, 98);
708 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
709 OUT_BCS_BATCH(batch, i);
712 for (j = 0; j < 32; j++) {
713 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
714 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
715 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
716 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
717 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
718 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
721 for (j = 0; j < 32; j++) {
722 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
723 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
724 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
725 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
726 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
727 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
731 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
732 ADVANCE_BCS_BATCH(batch);
737 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
738 VAPictureParameterBufferH264 *pic_param,
739 VASliceParameterBufferH264 *slice_param,
740 dri_bo *slice_data_bo,
741 VASliceParameterBufferH264 *next_slice_param,
742 struct gen7_mfd_context *gen7_mfd_context)
744 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
745 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
747 pic_param->pic_fields.bits.entropy_coding_mode_flag);
749 /* the input bitsteam format on GEN7 differs from GEN6 */
750 BEGIN_BCS_BATCH(batch, 6);
751 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
753 (slice_param->slice_data_size));
754 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
762 ((slice_data_bit_offset >> 3) << 16) |
766 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
767 (slice_data_bit_offset & 0x7));
768 OUT_BCS_BATCH(batch, 0);
769 ADVANCE_BCS_BATCH(batch);
773 gen8_mfd_avc_context_init(
774 VADriverContextP ctx,
775 struct gen7_mfd_context *gen7_mfd_context
778 /* Initialize flat scaling lists */
779 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
783 gen8_mfd_avc_decode_init(VADriverContextP ctx,
784 struct decode_state *decode_state,
785 struct gen7_mfd_context *gen7_mfd_context)
787 VAPictureParameterBufferH264 *pic_param;
788 VASliceParameterBufferH264 *slice_param;
789 struct i965_driver_data *i965 = i965_driver_data(ctx);
790 struct object_surface *obj_surface;
792 int i, j, enable_avc_ildb = 0;
793 unsigned int width_in_mbs, height_in_mbs;
795 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
796 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
797 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
799 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
800 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
801 assert((slice_param->slice_type == SLICE_TYPE_I) ||
802 (slice_param->slice_type == SLICE_TYPE_SI) ||
803 (slice_param->slice_type == SLICE_TYPE_P) ||
804 (slice_param->slice_type == SLICE_TYPE_SP) ||
805 (slice_param->slice_type == SLICE_TYPE_B));
807 if (slice_param->disable_deblocking_filter_idc != 1) {
816 assert(decode_state->pic_param && decode_state->pic_param->buffer);
817 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
818 gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
819 gen7_mfd_context->reference_surface);
820 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
821 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
822 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
823 assert(height_in_mbs > 0 && height_in_mbs <= 256);
825 /* Current decoded picture */
826 obj_surface = decode_state->render_object;
827 if (pic_param->pic_fields.bits.reference_pic_flag)
828 obj_surface->flags |= SURFACE_REFERENCED;
830 obj_surface->flags &= ~SURFACE_REFERENCED;
832 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
833 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
835 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
836 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
837 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
838 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
840 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
841 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
842 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
843 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
845 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
846 bo = dri_bo_alloc(i965->intel.bufmgr,
851 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
852 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
854 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
855 bo = dri_bo_alloc(i965->intel.bufmgr,
856 "deblocking filter row store",
857 width_in_mbs * 64 * 4,
860 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
861 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
863 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
864 bo = dri_bo_alloc(i965->intel.bufmgr,
866 width_in_mbs * 64 * 2,
869 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
870 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
872 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
873 bo = dri_bo_alloc(i965->intel.bufmgr,
875 width_in_mbs * 64 * 2,
878 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
879 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
881 gen7_mfd_context->bitplane_read_buffer.valid = 0;
885 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
886 struct decode_state *decode_state,
887 struct gen7_mfd_context *gen7_mfd_context)
889 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
890 VAPictureParameterBufferH264 *pic_param;
891 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
892 dri_bo *slice_data_bo;
895 assert(decode_state->pic_param && decode_state->pic_param->buffer);
896 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
897 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
899 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
900 intel_batchbuffer_emit_mi_flush(batch);
901 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
902 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
903 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
904 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
905 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
906 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
907 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
909 for (j = 0; j < decode_state->num_slice_params; j++) {
910 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
911 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
912 slice_data_bo = decode_state->slice_datas[j]->bo;
913 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
915 if (j == decode_state->num_slice_params - 1)
916 next_slice_group_param = NULL;
918 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
920 if (j == 0 && slice_param->first_mb_in_slice)
921 gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
923 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
924 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
925 assert((slice_param->slice_type == SLICE_TYPE_I) ||
926 (slice_param->slice_type == SLICE_TYPE_SI) ||
927 (slice_param->slice_type == SLICE_TYPE_P) ||
928 (slice_param->slice_type == SLICE_TYPE_SP) ||
929 (slice_param->slice_type == SLICE_TYPE_B));
931 if (i < decode_state->slice_params[j]->num_elements - 1)
932 next_slice_param = slice_param + 1;
934 next_slice_param = next_slice_group_param;
936 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
937 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
938 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
939 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
940 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
945 intel_batchbuffer_end_atomic(batch);
946 intel_batchbuffer_flush(batch);
950 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
951 struct decode_state *decode_state,
952 struct gen7_mfd_context *gen7_mfd_context)
954 VAPictureParameterBufferMPEG2 *pic_param;
955 struct i965_driver_data *i965 = i965_driver_data(ctx);
956 struct object_surface *obj_surface;
958 unsigned int width_in_mbs;
960 assert(decode_state->pic_param && decode_state->pic_param->buffer);
961 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
962 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
964 mpeg2_set_reference_surfaces(
966 gen7_mfd_context->reference_surface,
971 /* Current decoded picture */
972 obj_surface = decode_state->render_object;
973 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
975 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
976 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
977 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
978 gen7_mfd_context->pre_deblocking_output.valid = 1;
980 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
981 bo = dri_bo_alloc(i965->intel.bufmgr,
986 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
987 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
989 gen7_mfd_context->post_deblocking_output.valid = 0;
990 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
991 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
992 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
993 gen7_mfd_context->bitplane_read_buffer.valid = 0;
997 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
998 struct decode_state *decode_state,
999 struct gen7_mfd_context *gen7_mfd_context)
1001 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1002 VAPictureParameterBufferMPEG2 *pic_param;
1003 unsigned int slice_concealment_disable_bit = 0;
1005 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1006 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1008 slice_concealment_disable_bit = 1;
1010 BEGIN_BCS_BATCH(batch, 13);
1011 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1012 OUT_BCS_BATCH(batch,
1013 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1014 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1015 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1016 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1017 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1018 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1019 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1020 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1021 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1022 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1023 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1024 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1025 OUT_BCS_BATCH(batch,
1026 pic_param->picture_coding_type << 9);
1027 OUT_BCS_BATCH(batch,
1028 (slice_concealment_disable_bit << 31) |
1029 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1030 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1031 OUT_BCS_BATCH(batch, 0);
1032 OUT_BCS_BATCH(batch, 0);
1033 OUT_BCS_BATCH(batch, 0);
1034 OUT_BCS_BATCH(batch, 0);
1035 OUT_BCS_BATCH(batch, 0);
1036 OUT_BCS_BATCH(batch, 0);
1037 OUT_BCS_BATCH(batch, 0);
1038 OUT_BCS_BATCH(batch, 0);
1039 OUT_BCS_BATCH(batch, 0);
1040 ADVANCE_BCS_BATCH(batch);
1044 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1045 struct decode_state *decode_state,
1046 struct gen7_mfd_context *gen7_mfd_context)
1048 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1051 /* Update internal QM state */
1052 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1053 VAIQMatrixBufferMPEG2 * const iq_matrix =
1054 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1056 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1057 iq_matrix->load_intra_quantiser_matrix) {
1058 gen_iq_matrix->load_intra_quantiser_matrix =
1059 iq_matrix->load_intra_quantiser_matrix;
1060 if (iq_matrix->load_intra_quantiser_matrix) {
1061 for (j = 0; j < 64; j++)
1062 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1063 iq_matrix->intra_quantiser_matrix[j];
1067 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1068 iq_matrix->load_non_intra_quantiser_matrix) {
1069 gen_iq_matrix->load_non_intra_quantiser_matrix =
1070 iq_matrix->load_non_intra_quantiser_matrix;
1071 if (iq_matrix->load_non_intra_quantiser_matrix) {
1072 for (j = 0; j < 64; j++)
1073 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1074 iq_matrix->non_intra_quantiser_matrix[j];
1079 /* Commit QM state to HW */
1080 for (i = 0; i < 2; i++) {
1081 unsigned char *qm = NULL;
1085 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1086 qm = gen_iq_matrix->intra_quantiser_matrix;
1087 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1090 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1091 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1092 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1099 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1104 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1105 VAPictureParameterBufferMPEG2 *pic_param,
1106 VASliceParameterBufferMPEG2 *slice_param,
1107 VASliceParameterBufferMPEG2 *next_slice_param,
1108 struct gen7_mfd_context *gen7_mfd_context)
1110 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1111 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1112 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1114 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1115 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1117 is_field_pic_wa = is_field_pic &&
1118 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1120 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1121 hpos0 = slice_param->slice_horizontal_position;
1123 if (next_slice_param == NULL) {
1124 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1127 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1128 hpos1 = next_slice_param->slice_horizontal_position;
1131 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1133 BEGIN_BCS_BATCH(batch, 5);
1134 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1135 OUT_BCS_BATCH(batch,
1136 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1137 OUT_BCS_BATCH(batch,
1138 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1139 OUT_BCS_BATCH(batch,
1143 (next_slice_param == NULL) << 5 |
1144 (next_slice_param == NULL) << 3 |
1145 (slice_param->macroblock_offset & 0x7));
1146 OUT_BCS_BATCH(batch,
1147 (slice_param->quantiser_scale_code << 24) |
1148 (vpos1 << 8 | hpos1));
1149 ADVANCE_BCS_BATCH(batch);
1153 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1154 struct decode_state *decode_state,
1155 struct gen7_mfd_context *gen7_mfd_context)
1157 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1158 VAPictureParameterBufferMPEG2 *pic_param;
1159 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1160 dri_bo *slice_data_bo;
1163 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1164 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1166 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1167 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1168 intel_batchbuffer_emit_mi_flush(batch);
1169 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1170 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1171 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1172 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1173 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1174 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1176 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1177 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1178 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1180 for (j = 0; j < decode_state->num_slice_params; j++) {
1181 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1182 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1183 slice_data_bo = decode_state->slice_datas[j]->bo;
1184 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1186 if (j == decode_state->num_slice_params - 1)
1187 next_slice_group_param = NULL;
1189 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1191 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1192 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1194 if (i < decode_state->slice_params[j]->num_elements - 1)
1195 next_slice_param = slice_param + 1;
1197 next_slice_param = next_slice_group_param;
1199 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1204 intel_batchbuffer_end_atomic(batch);
1205 intel_batchbuffer_flush(batch);
1208 static const int va_to_gen7_vc1_pic_type[5] = {
1212 GEN7_VC1_BI_PICTURE,
1216 static const int va_to_gen7_vc1_mv[4] = {
1218 2, /* 1-MV half-pel */
1219 3, /* 1-MV half-pef bilinear */
1223 static const int b_picture_scale_factor[21] = {
1224 128, 85, 170, 64, 192,
1225 51, 102, 153, 204, 43,
1226 215, 37, 74, 111, 148,
1227 185, 222, 32, 96, 160,
1231 static const int va_to_gen7_vc1_condover[3] = {
1237 static const int va_to_gen7_vc1_profile[4] = {
1238 GEN7_VC1_SIMPLE_PROFILE,
1239 GEN7_VC1_MAIN_PROFILE,
1240 GEN7_VC1_RESERVED_PROFILE,
1241 GEN7_VC1_ADVANCED_PROFILE
1245 gen8_mfd_free_vc1_surface(void **data)
1247 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1249 if (!gen7_vc1_surface)
1252 dri_bo_unreference(gen7_vc1_surface->dmv);
1253 free(gen7_vc1_surface);
1258 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1259 VAPictureParameterBufferVC1 *pic_param,
1260 struct object_surface *obj_surface)
1262 struct i965_driver_data *i965 = i965_driver_data(ctx);
1263 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1264 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1265 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1267 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1269 if (!gen7_vc1_surface) {
1270 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1271 assert((obj_surface->size & 0x3f) == 0);
1272 obj_surface->private_data = gen7_vc1_surface;
1275 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1277 if (gen7_vc1_surface->dmv == NULL) {
1278 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1279 "direct mv w/r buffer",
1280 width_in_mbs * height_in_mbs * 64,
1286 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1287 struct decode_state *decode_state,
1288 struct gen7_mfd_context *gen7_mfd_context)
1290 VAPictureParameterBufferVC1 *pic_param;
1291 struct i965_driver_data *i965 = i965_driver_data(ctx);
1292 struct object_surface *obj_surface;
1297 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1298 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1299 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1300 picture_type = pic_param->picture_fields.bits.picture_type;
1302 intel_update_vc1_frame_store_index(ctx,
1305 gen7_mfd_context->reference_surface);
1307 /* Current decoded picture */
1308 obj_surface = decode_state->render_object;
1309 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1310 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1312 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1313 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1314 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1315 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1317 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1318 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1319 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1320 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1322 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1323 bo = dri_bo_alloc(i965->intel.bufmgr,
1328 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1329 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1331 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1332 bo = dri_bo_alloc(i965->intel.bufmgr,
1333 "deblocking filter row store",
1334 width_in_mbs * 7 * 64,
1337 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1338 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1340 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1341 bo = dri_bo_alloc(i965->intel.bufmgr,
1342 "bsd mpc row store",
1346 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1347 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1349 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1351 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1352 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1354 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1355 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1356 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1357 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1359 uint8_t *src = NULL, *dst = NULL;
1361 assert(decode_state->bit_plane->buffer);
1362 src = decode_state->bit_plane->buffer;
1364 bo = dri_bo_alloc(i965->intel.bufmgr,
1366 bitplane_width * height_in_mbs,
1369 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1371 dri_bo_map(bo, True);
1372 assert(bo->virtual);
1375 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1376 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1377 int src_index, dst_index;
1381 src_index = (src_h * width_in_mbs + src_w) / 2;
1382 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1383 src_value = ((src[src_index] >> src_shift) & 0xf);
1385 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1389 dst_index = src_w / 2;
1390 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1394 dst[src_w / 2] >>= 4;
1396 dst += bitplane_width;
1401 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1405 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1406 struct decode_state *decode_state,
1407 struct gen7_mfd_context *gen7_mfd_context)
1409 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1410 VAPictureParameterBufferVC1 *pic_param;
1411 struct object_surface *obj_surface;
1412 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1413 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1414 int unified_mv_mode;
1415 int ref_field_pic_polarity = 0;
1416 int scale_factor = 0;
1418 int dmv_surface_valid = 0;
1424 int interpolation_mode = 0;
1426 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1427 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1429 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1430 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1431 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1432 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1433 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1434 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1435 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1436 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1439 alt_pquant_config = 0;
1440 alt_pquant_edge_mask = 0;
1441 } else if (dquant == 2) {
1442 alt_pquant_config = 1;
1443 alt_pquant_edge_mask = 0xf;
1445 assert(dquant == 1);
1446 if (dquantfrm == 0) {
1447 alt_pquant_config = 0;
1448 alt_pquant_edge_mask = 0;
1451 assert(dquantfrm == 1);
1452 alt_pquant_config = 1;
1454 switch (dqprofile) {
1456 if (dqbilevel == 0) {
1457 alt_pquant_config = 2;
1458 alt_pquant_edge_mask = 0;
1460 assert(dqbilevel == 1);
1461 alt_pquant_config = 3;
1462 alt_pquant_edge_mask = 0;
1467 alt_pquant_edge_mask = 0xf;
1472 alt_pquant_edge_mask = 0x9;
1474 alt_pquant_edge_mask = (0x3 << dqdbedge);
1479 alt_pquant_edge_mask = (0x1 << dqsbedge);
1488 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1489 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1490 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1492 assert(pic_param->mv_fields.bits.mv_mode < 4);
1493 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1496 if (pic_param->sequence_fields.bits.interlace == 1 &&
1497 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1498 /* FIXME: calculate reference field picture polarity */
1500 ref_field_pic_polarity = 0;
1503 if (pic_param->b_picture_fraction < 21)
1504 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1506 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1508 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1509 picture_type == GEN7_VC1_I_PICTURE)
1510 picture_type = GEN7_VC1_BI_PICTURE;
1512 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1513 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1515 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1518 * 8.3.6.2.1 Transform Type Selection
1519 * If variable-sized transform coding is not enabled,
1520 * then the 8x8 transform shall be used for all blocks.
1521 * it is also MFX_VC1_PIC_STATE requirement.
1523 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1524 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1525 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1529 if (picture_type == GEN7_VC1_B_PICTURE) {
1530 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1532 obj_surface = decode_state->reference_objects[1];
1535 gen7_vc1_surface = obj_surface->private_data;
1537 if (!gen7_vc1_surface ||
1538 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1539 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1540 dmv_surface_valid = 0;
1542 dmv_surface_valid = 1;
1545 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1547 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1548 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1550 if (pic_param->picture_fields.bits.top_field_first)
1556 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1557 brfd = pic_param->reference_fields.bits.reference_distance;
1558 brfd = (scale_factor * brfd) >> 8;
1559 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1565 overlap = pic_param->sequence_fields.bits.overlap;
1569 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1570 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1571 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1575 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1576 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1579 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1580 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1581 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1583 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1584 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1591 assert(pic_param->conditional_overlap_flag < 3);
1592 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1594 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1595 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1596 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1597 interpolation_mode = 9; /* Half-pel bilinear */
1598 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1599 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1600 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1601 interpolation_mode = 1; /* Half-pel bicubic */
1603 interpolation_mode = 0; /* Quarter-pel bicubic */
1605 BEGIN_BCS_BATCH(batch, 6);
1606 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1607 OUT_BCS_BATCH(batch,
1608 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1609 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1610 OUT_BCS_BATCH(batch,
1611 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1612 dmv_surface_valid << 15 |
1613 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1614 pic_param->rounding_control << 13 |
1615 pic_param->sequence_fields.bits.syncmarker << 12 |
1616 interpolation_mode << 8 |
1617 0 << 7 | /* FIXME: scale up or down ??? */
1618 pic_param->range_reduction_frame << 6 |
1619 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1621 !pic_param->picture_fields.bits.is_first_field << 3 |
1622 (pic_param->sequence_fields.bits.profile == 3) << 0);
1623 OUT_BCS_BATCH(batch,
1624 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1625 picture_type << 26 |
1628 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1630 OUT_BCS_BATCH(batch,
1631 unified_mv_mode << 28 |
1632 pic_param->mv_fields.bits.four_mv_switch << 27 |
1633 pic_param->fast_uvmc_flag << 26 |
1634 ref_field_pic_polarity << 25 |
1635 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1636 pic_param->reference_fields.bits.reference_distance << 20 |
1637 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1638 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1639 pic_param->mv_fields.bits.extended_mv_range << 8 |
1640 alt_pquant_edge_mask << 4 |
1641 alt_pquant_config << 2 |
1642 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1643 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1644 OUT_BCS_BATCH(batch,
1645 !!pic_param->bitplane_present.value << 31 |
1646 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1647 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1648 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1649 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1650 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1651 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1652 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1653 pic_param->mv_fields.bits.mv_table << 20 |
1654 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1655 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1656 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1657 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1658 pic_param->mb_mode_table << 8 |
1660 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1661 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1662 pic_param->cbp_table << 0);
1663 ADVANCE_BCS_BATCH(batch);
1667 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1668 struct decode_state *decode_state,
1669 struct gen7_mfd_context *gen7_mfd_context)
1671 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1672 VAPictureParameterBufferVC1 *pic_param;
1673 int intensitycomp_single;
1675 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1676 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1678 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1679 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1680 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1682 BEGIN_BCS_BATCH(batch, 6);
1683 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1684 OUT_BCS_BATCH(batch,
1685 0 << 14 | /* FIXME: double ??? */
1687 intensitycomp_single << 10 |
1688 intensitycomp_single << 8 |
1689 0 << 4 | /* FIXME: interlace mode */
1691 OUT_BCS_BATCH(batch,
1692 pic_param->luma_shift << 16 |
1693 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1694 OUT_BCS_BATCH(batch, 0);
1695 OUT_BCS_BATCH(batch, 0);
1696 OUT_BCS_BATCH(batch, 0);
1697 ADVANCE_BCS_BATCH(batch);
1701 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1702 struct decode_state *decode_state,
1703 struct gen7_mfd_context *gen7_mfd_context)
1705 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1706 struct object_surface *obj_surface;
1707 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1709 obj_surface = decode_state->render_object;
1711 if (obj_surface && obj_surface->private_data) {
1712 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1715 obj_surface = decode_state->reference_objects[1];
1717 if (obj_surface && obj_surface->private_data) {
1718 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1721 BEGIN_BCS_BATCH(batch, 7);
1722 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1724 if (dmv_write_buffer)
1725 OUT_BCS_RELOC(batch, dmv_write_buffer,
1726 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1729 OUT_BCS_BATCH(batch, 0);
1731 OUT_BCS_BATCH(batch, 0);
1732 OUT_BCS_BATCH(batch, 0);
1734 if (dmv_read_buffer)
1735 OUT_BCS_RELOC(batch, dmv_read_buffer,
1736 I915_GEM_DOMAIN_INSTRUCTION, 0,
1739 OUT_BCS_BATCH(batch, 0);
1741 OUT_BCS_BATCH(batch, 0);
1742 OUT_BCS_BATCH(batch, 0);
1744 ADVANCE_BCS_BATCH(batch);
1748 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1750 int out_slice_data_bit_offset;
1751 int slice_header_size = in_slice_data_bit_offset / 8;
1755 out_slice_data_bit_offset = in_slice_data_bit_offset;
1757 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1758 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1763 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1766 return out_slice_data_bit_offset;
1770 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1771 VAPictureParameterBufferVC1 *pic_param,
1772 VASliceParameterBufferVC1 *slice_param,
1773 VASliceParameterBufferVC1 *next_slice_param,
1774 dri_bo *slice_data_bo,
1775 struct gen7_mfd_context *gen7_mfd_context)
1777 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1778 int next_slice_start_vert_pos;
1779 int macroblock_offset;
1780 uint8_t *slice_data = NULL;
1782 dri_bo_map(slice_data_bo, 0);
1783 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1784 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1785 slice_param->macroblock_offset,
1786 pic_param->sequence_fields.bits.profile);
1787 dri_bo_unmap(slice_data_bo);
1789 if (next_slice_param)
1790 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1792 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1794 BEGIN_BCS_BATCH(batch, 5);
1795 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1796 OUT_BCS_BATCH(batch,
1797 slice_param->slice_data_size - (macroblock_offset >> 3));
1798 OUT_BCS_BATCH(batch,
1799 slice_param->slice_data_offset + (macroblock_offset >> 3));
1800 OUT_BCS_BATCH(batch,
1801 slice_param->slice_vertical_position << 16 |
1802 next_slice_start_vert_pos << 0);
1803 OUT_BCS_BATCH(batch,
1804 (macroblock_offset & 0x7));
1805 ADVANCE_BCS_BATCH(batch);
1809 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1810 struct decode_state *decode_state,
1811 struct gen7_mfd_context *gen7_mfd_context)
1813 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1814 VAPictureParameterBufferVC1 *pic_param;
1815 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1816 dri_bo *slice_data_bo;
1819 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1820 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1822 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1823 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1824 intel_batchbuffer_emit_mi_flush(batch);
1825 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1826 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1827 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1828 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1829 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1830 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1831 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1833 for (j = 0; j < decode_state->num_slice_params; j++) {
1834 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1835 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1836 slice_data_bo = decode_state->slice_datas[j]->bo;
1837 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1839 if (j == decode_state->num_slice_params - 1)
1840 next_slice_group_param = NULL;
1842 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1844 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1845 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1847 if (i < decode_state->slice_params[j]->num_elements - 1)
1848 next_slice_param = slice_param + 1;
1850 next_slice_param = next_slice_group_param;
1852 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1857 intel_batchbuffer_end_atomic(batch);
1858 intel_batchbuffer_flush(batch);
1862 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1863 struct decode_state *decode_state,
1864 struct gen7_mfd_context *gen7_mfd_context)
1866 struct object_surface *obj_surface;
1867 VAPictureParameterBufferJPEGBaseline *pic_param;
1868 int subsampling = SUBSAMPLE_YUV420;
1869 int fourcc = VA_FOURCC_IMC3;
1871 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1873 if (pic_param->num_components == 1)
1874 subsampling = SUBSAMPLE_YUV400;
1875 else if (pic_param->num_components == 3) {
1876 int h1 = pic_param->components[0].h_sampling_factor;
1877 int h2 = pic_param->components[1].h_sampling_factor;
1878 int h3 = pic_param->components[2].h_sampling_factor;
1879 int v1 = pic_param->components[0].v_sampling_factor;
1880 int v2 = pic_param->components[1].v_sampling_factor;
1881 int v3 = pic_param->components[2].v_sampling_factor;
1883 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1884 v1 == 2 && v2 == 1 && v3 == 1) {
1885 subsampling = SUBSAMPLE_YUV420;
1886 fourcc = VA_FOURCC_IMC3;
1887 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1888 v1 == 1 && v2 == 1 && v3 == 1) {
1889 subsampling = SUBSAMPLE_YUV422H;
1890 fourcc = VA_FOURCC_422H;
1891 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1892 v1 == 1 && v2 == 1 && v3 == 1) {
1893 subsampling = SUBSAMPLE_YUV444;
1894 fourcc = VA_FOURCC_444P;
1895 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1896 v1 == 1 && v2 == 1 && v3 == 1) {
1897 subsampling = SUBSAMPLE_YUV411;
1898 fourcc = VA_FOURCC_411P;
1899 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1900 v1 == 2 && v2 == 1 && v3 == 1) {
1901 subsampling = SUBSAMPLE_YUV422V;
1902 fourcc = VA_FOURCC_422V;
1903 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1904 v1 == 2 && v2 == 2 && v3 == 2) {
1905 subsampling = SUBSAMPLE_YUV422H;
1906 fourcc = VA_FOURCC_422H;
1907 } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1908 v1 == 2 && v2 == 1 && v3 == 1) {
1909 subsampling = SUBSAMPLE_YUV422V;
1910 fourcc = VA_FOURCC_422V;
1918 /* Current decoded picture */
1919 obj_surface = decode_state->render_object;
1920 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1922 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1923 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1924 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1925 gen7_mfd_context->pre_deblocking_output.valid = 1;
1927 gen7_mfd_context->post_deblocking_output.bo = NULL;
1928 gen7_mfd_context->post_deblocking_output.valid = 0;
1930 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1931 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1933 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1934 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1936 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1937 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1939 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1940 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1942 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1943 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1946 static const int va_to_gen7_jpeg_rotation[4] = {
1947 GEN7_JPEG_ROTATION_0,
1948 GEN7_JPEG_ROTATION_90,
1949 GEN7_JPEG_ROTATION_180,
1950 GEN7_JPEG_ROTATION_270
1954 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1955 struct decode_state *decode_state,
1956 struct gen7_mfd_context *gen7_mfd_context)
1958 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1959 VAPictureParameterBufferJPEGBaseline *pic_param;
1960 int chroma_type = GEN7_YUV420;
1961 int frame_width_in_blks;
1962 int frame_height_in_blks;
1964 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1965 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1967 if (pic_param->num_components == 1)
1968 chroma_type = GEN7_YUV400;
1969 else if (pic_param->num_components == 3) {
1970 int h1 = pic_param->components[0].h_sampling_factor;
1971 int h2 = pic_param->components[1].h_sampling_factor;
1972 int h3 = pic_param->components[2].h_sampling_factor;
1973 int v1 = pic_param->components[0].v_sampling_factor;
1974 int v2 = pic_param->components[1].v_sampling_factor;
1975 int v3 = pic_param->components[2].v_sampling_factor;
1977 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1978 v1 == 2 && v2 == 1 && v3 == 1)
1979 chroma_type = GEN7_YUV420;
1980 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1981 v1 == 1 && v2 == 1 && v3 == 1)
1982 chroma_type = GEN7_YUV422H_2Y;
1983 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1984 v1 == 1 && v2 == 1 && v3 == 1)
1985 chroma_type = GEN7_YUV444;
1986 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1987 v1 == 1 && v2 == 1 && v3 == 1)
1988 chroma_type = GEN7_YUV411;
1989 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1990 v1 == 2 && v2 == 1 && v3 == 1)
1991 chroma_type = GEN7_YUV422V_2Y;
1992 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1993 v1 == 2 && v2 == 2 && v3 == 2)
1994 chroma_type = GEN7_YUV422H_4Y;
1995 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1996 v1 == 2 && v2 == 1 && v3 == 1)
1997 chroma_type = GEN7_YUV422V_4Y;
2002 if (chroma_type == GEN7_YUV400 ||
2003 chroma_type == GEN7_YUV444 ||
2004 chroma_type == GEN7_YUV422V_2Y) {
2005 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2006 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2007 } else if (chroma_type == GEN7_YUV411) {
2008 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2009 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2011 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2012 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2015 BEGIN_BCS_BATCH(batch, 3);
2016 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2017 OUT_BCS_BATCH(batch,
2018 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2019 (chroma_type << 0));
2020 OUT_BCS_BATCH(batch,
2021 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2022 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2023 ADVANCE_BCS_BATCH(batch);
2026 static const int va_to_gen7_jpeg_hufftable[2] = {
2032 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2033 struct decode_state *decode_state,
2034 struct gen7_mfd_context *gen7_mfd_context,
2037 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2038 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2041 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2044 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2046 for (index = 0; index < num_tables; index++) {
2047 int id = va_to_gen7_jpeg_hufftable[index];
2048 if (!huffman_table->load_huffman_table[index])
2050 BEGIN_BCS_BATCH(batch, 53);
2051 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2052 OUT_BCS_BATCH(batch, id);
2053 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2054 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2055 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2056 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2057 ADVANCE_BCS_BATCH(batch);
2061 static const int va_to_gen7_jpeg_qm[5] = {
2063 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2064 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2065 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2066 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2070 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2071 struct decode_state *decode_state,
2072 struct gen7_mfd_context *gen7_mfd_context)
2074 VAPictureParameterBufferJPEGBaseline *pic_param;
2075 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2078 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2081 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2082 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2084 assert(pic_param->num_components <= 3);
2086 for (index = 0; index < pic_param->num_components; index++) {
2087 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2089 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2090 unsigned char raster_qm[64];
2093 if (id > 4 || id < 1)
2096 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2099 qm_type = va_to_gen7_jpeg_qm[id];
2101 for (j = 0; j < 64; j++)
2102 raster_qm[zigzag_direct[j]] = qm[j];
2104 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2109 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2110 VAPictureParameterBufferJPEGBaseline *pic_param,
2111 VASliceParameterBufferJPEGBaseline *slice_param,
2112 VASliceParameterBufferJPEGBaseline *next_slice_param,
2113 dri_bo *slice_data_bo,
2114 struct gen7_mfd_context *gen7_mfd_context)
2116 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2117 int scan_component_mask = 0;
2120 assert(slice_param->num_components > 0);
2121 assert(slice_param->num_components < 4);
2122 assert(slice_param->num_components <= pic_param->num_components);
2124 for (i = 0; i < slice_param->num_components; i++) {
2125 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2127 scan_component_mask |= (1 << 0);
2130 scan_component_mask |= (1 << 1);
2133 scan_component_mask |= (1 << 2);
2141 BEGIN_BCS_BATCH(batch, 6);
2142 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2143 OUT_BCS_BATCH(batch,
2144 slice_param->slice_data_size);
2145 OUT_BCS_BATCH(batch,
2146 slice_param->slice_data_offset);
2147 OUT_BCS_BATCH(batch,
2148 slice_param->slice_horizontal_position << 16 |
2149 slice_param->slice_vertical_position << 0);
2150 OUT_BCS_BATCH(batch,
2151 ((slice_param->num_components != 1) << 30) | /* interleaved */
2152 (scan_component_mask << 27) | /* scan components */
2153 (0 << 26) | /* disable interrupt allowed */
2154 (slice_param->num_mcus << 0)); /* MCU count */
2155 OUT_BCS_BATCH(batch,
2156 (slice_param->restart_interval << 0)); /* RestartInterval */
2157 ADVANCE_BCS_BATCH(batch);
2160 /* Workaround for JPEG decoding on Ivybridge */
2166 unsigned char data[32];
2168 int data_bit_offset;
2170 } gen7_jpeg_wa_clip = {
2174 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2175 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2183 gen8_jpeg_wa_init(VADriverContextP ctx,
2184 struct gen7_mfd_context *gen7_mfd_context)
2186 struct i965_driver_data *i965 = i965_driver_data(ctx);
2188 struct object_surface *obj_surface;
2190 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2191 i965_DestroySurfaces(ctx,
2192 &gen7_mfd_context->jpeg_wa_surface_id,
2195 status = i965_CreateSurfaces(ctx,
2196 gen7_jpeg_wa_clip.width,
2197 gen7_jpeg_wa_clip.height,
2198 VA_RT_FORMAT_YUV420,
2200 &gen7_mfd_context->jpeg_wa_surface_id);
2201 assert(status == VA_STATUS_SUCCESS);
2203 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2204 assert(obj_surface);
2205 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2206 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2208 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2209 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2213 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2215 gen7_jpeg_wa_clip.data_size,
2216 gen7_jpeg_wa_clip.data);
2221 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2222 struct gen7_mfd_context *gen7_mfd_context)
2224 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2226 BEGIN_BCS_BATCH(batch, 5);
2227 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2228 OUT_BCS_BATCH(batch,
2229 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2230 (MFD_MODE_VLD << 15) | /* VLD mode */
2231 (0 << 10) | /* disable Stream-Out */
2232 (0 << 9) | /* Post Deblocking Output */
2233 (1 << 8) | /* Pre Deblocking Output */
2234 (0 << 5) | /* not in stitch mode */
2235 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2236 (MFX_FORMAT_AVC << 0));
2237 OUT_BCS_BATCH(batch,
2238 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2239 (0 << 3) | /* terminate if AVC mbdata error occurs */
2240 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2243 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2244 OUT_BCS_BATCH(batch, 0); /* reserved */
2245 ADVANCE_BCS_BATCH(batch);
2249 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2250 struct gen7_mfd_context *gen7_mfd_context)
2252 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2253 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2255 BEGIN_BCS_BATCH(batch, 6);
2256 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2257 OUT_BCS_BATCH(batch, 0);
2258 OUT_BCS_BATCH(batch,
2259 ((obj_surface->orig_width - 1) << 18) |
2260 ((obj_surface->orig_height - 1) << 4));
2261 OUT_BCS_BATCH(batch,
2262 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2263 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2264 (0 << 22) | /* surface object control state, ignored */
2265 ((obj_surface->width - 1) << 3) | /* pitch */
2266 (0 << 2) | /* must be 0 */
2267 (1 << 1) | /* must be tiled */
2268 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2269 OUT_BCS_BATCH(batch,
2270 (0 << 16) | /* X offset for U(Cb), must be 0 */
2271 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2272 OUT_BCS_BATCH(batch,
2273 (0 << 16) | /* X offset for V(Cr), must be 0 */
2274 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2275 ADVANCE_BCS_BATCH(batch);
2279 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2280 struct gen7_mfd_context *gen7_mfd_context)
2282 struct i965_driver_data *i965 = i965_driver_data(ctx);
2283 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2284 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2288 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2293 BEGIN_BCS_BATCH(batch, 61);
2294 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2295 OUT_BCS_RELOC(batch,
2297 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2299 OUT_BCS_BATCH(batch, 0);
2300 OUT_BCS_BATCH(batch, 0);
2303 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2304 OUT_BCS_BATCH(batch, 0);
2305 OUT_BCS_BATCH(batch, 0);
2307 /* uncompressed-video & stream out 7-12 */
2308 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2309 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2310 OUT_BCS_BATCH(batch, 0);
2311 OUT_BCS_BATCH(batch, 0);
2312 OUT_BCS_BATCH(batch, 0);
2313 OUT_BCS_BATCH(batch, 0);
2315 /* the DW 13-15 is for intra row store scratch */
2316 OUT_BCS_RELOC(batch,
2318 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2320 OUT_BCS_BATCH(batch, 0);
2321 OUT_BCS_BATCH(batch, 0);
2323 /* the DW 16-18 is for deblocking filter */
2324 OUT_BCS_BATCH(batch, 0);
2325 OUT_BCS_BATCH(batch, 0);
2326 OUT_BCS_BATCH(batch, 0);
2329 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2330 OUT_BCS_BATCH(batch, 0);
2331 OUT_BCS_BATCH(batch, 0);
2333 OUT_BCS_BATCH(batch, 0);
2335 /* the DW52-54 is for mb status address */
2336 OUT_BCS_BATCH(batch, 0);
2337 OUT_BCS_BATCH(batch, 0);
2338 OUT_BCS_BATCH(batch, 0);
2339 /* the DW56-60 is for ILDB & second ILDB address */
2340 OUT_BCS_BATCH(batch, 0);
2341 OUT_BCS_BATCH(batch, 0);
2342 OUT_BCS_BATCH(batch, 0);
2343 OUT_BCS_BATCH(batch, 0);
2344 OUT_BCS_BATCH(batch, 0);
2345 OUT_BCS_BATCH(batch, 0);
2347 ADVANCE_BCS_BATCH(batch);
2349 dri_bo_unreference(intra_bo);
2353 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2354 struct gen7_mfd_context *gen7_mfd_context)
2356 struct i965_driver_data *i965 = i965_driver_data(ctx);
2357 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2358 dri_bo *bsd_mpc_bo, *mpr_bo;
2360 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2361 "bsd mpc row store",
2362 11520, /* 1.5 * 120 * 64 */
2365 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2367 7680, /* 1. 0 * 120 * 64 */
2370 BEGIN_BCS_BATCH(batch, 10);
2371 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2373 OUT_BCS_RELOC(batch,
2375 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2378 OUT_BCS_BATCH(batch, 0);
2379 OUT_BCS_BATCH(batch, 0);
2381 OUT_BCS_RELOC(batch,
2383 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2385 OUT_BCS_BATCH(batch, 0);
2386 OUT_BCS_BATCH(batch, 0);
2388 OUT_BCS_BATCH(batch, 0);
2389 OUT_BCS_BATCH(batch, 0);
2390 OUT_BCS_BATCH(batch, 0);
2392 ADVANCE_BCS_BATCH(batch);
2394 dri_bo_unreference(bsd_mpc_bo);
2395 dri_bo_unreference(mpr_bo);
2399 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2400 struct gen7_mfd_context *gen7_mfd_context)
2406 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2407 struct gen7_mfd_context *gen7_mfd_context)
2409 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2411 int mbaff_frame_flag = 0;
2412 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2414 BEGIN_BCS_BATCH(batch, 16);
2415 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2416 OUT_BCS_BATCH(batch,
2417 width_in_mbs * height_in_mbs);
2418 OUT_BCS_BATCH(batch,
2419 ((height_in_mbs - 1) << 16) |
2420 ((width_in_mbs - 1) << 0));
2421 OUT_BCS_BATCH(batch,
2426 (0 << 12) | /* differ from GEN6 */
2429 OUT_BCS_BATCH(batch,
2430 (1 << 10) | /* 4:2:0 */
2431 (1 << 7) | /* CABAC */
2437 (mbaff_frame_flag << 1) |
2439 OUT_BCS_BATCH(batch, 0);
2440 OUT_BCS_BATCH(batch, 0);
2441 OUT_BCS_BATCH(batch, 0);
2442 OUT_BCS_BATCH(batch, 0);
2443 OUT_BCS_BATCH(batch, 0);
2444 OUT_BCS_BATCH(batch, 0);
2445 OUT_BCS_BATCH(batch, 0);
2446 OUT_BCS_BATCH(batch, 0);
2447 OUT_BCS_BATCH(batch, 0);
2448 OUT_BCS_BATCH(batch, 0);
2449 OUT_BCS_BATCH(batch, 0);
2450 ADVANCE_BCS_BATCH(batch);
2454 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2455 struct gen7_mfd_context *gen7_mfd_context)
2457 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2460 BEGIN_BCS_BATCH(batch, 71);
2461 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2463 /* reference surfaces 0..15 */
2464 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2465 OUT_BCS_BATCH(batch, 0); /* top */
2466 OUT_BCS_BATCH(batch, 0); /* bottom */
2469 OUT_BCS_BATCH(batch, 0);
2471 /* the current decoding frame/field */
2472 OUT_BCS_BATCH(batch, 0); /* top */
2473 OUT_BCS_BATCH(batch, 0);
2474 OUT_BCS_BATCH(batch, 0);
2477 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2478 OUT_BCS_BATCH(batch, 0);
2479 OUT_BCS_BATCH(batch, 0);
2482 OUT_BCS_BATCH(batch, 0);
2483 OUT_BCS_BATCH(batch, 0);
2485 ADVANCE_BCS_BATCH(batch);
2489 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2490 struct gen7_mfd_context *gen7_mfd_context)
2492 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2494 BEGIN_BCS_BATCH(batch, 11);
2495 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2496 OUT_BCS_RELOC(batch,
2497 gen7_mfd_context->jpeg_wa_slice_data_bo,
2498 I915_GEM_DOMAIN_INSTRUCTION, 0,
2500 OUT_BCS_BATCH(batch, 0);
2501 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2502 OUT_BCS_BATCH(batch, 0);
2503 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2504 OUT_BCS_BATCH(batch, 0);
2505 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2506 OUT_BCS_BATCH(batch, 0);
2507 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2508 OUT_BCS_BATCH(batch, 0);
2509 ADVANCE_BCS_BATCH(batch);
2513 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2514 struct gen7_mfd_context *gen7_mfd_context)
2516 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2518 /* the input bitsteam format on GEN7 differs from GEN6 */
2519 BEGIN_BCS_BATCH(batch, 6);
2520 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2521 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2522 OUT_BCS_BATCH(batch, 0);
2523 OUT_BCS_BATCH(batch,
2529 OUT_BCS_BATCH(batch,
2530 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2533 (1 << 3) | /* LastSlice Flag */
2534 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2535 OUT_BCS_BATCH(batch, 0);
2536 ADVANCE_BCS_BATCH(batch);
2540 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2541 struct gen7_mfd_context *gen7_mfd_context)
2543 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2544 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2545 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2546 int first_mb_in_slice = 0;
2547 int slice_type = SLICE_TYPE_I;
2549 BEGIN_BCS_BATCH(batch, 11);
2550 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2551 OUT_BCS_BATCH(batch, slice_type);
2552 OUT_BCS_BATCH(batch,
2553 (num_ref_idx_l1 << 24) |
2554 (num_ref_idx_l0 << 16) |
2557 OUT_BCS_BATCH(batch,
2559 (1 << 27) | /* disable Deblocking */
2561 (gen7_jpeg_wa_clip.qp << 16) |
2564 OUT_BCS_BATCH(batch,
2565 (slice_ver_pos << 24) |
2566 (slice_hor_pos << 16) |
2567 (first_mb_in_slice << 0));
2568 OUT_BCS_BATCH(batch,
2569 (next_slice_ver_pos << 16) |
2570 (next_slice_hor_pos << 0));
2571 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2572 OUT_BCS_BATCH(batch, 0);
2573 OUT_BCS_BATCH(batch, 0);
2574 OUT_BCS_BATCH(batch, 0);
2575 OUT_BCS_BATCH(batch, 0);
2576 ADVANCE_BCS_BATCH(batch);
2580 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2581 struct gen7_mfd_context *gen7_mfd_context)
2583 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2584 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2585 intel_batchbuffer_emit_mi_flush(batch);
2586 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2587 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2588 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2589 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2590 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2591 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2592 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2594 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2595 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2596 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2602 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2603 struct decode_state *decode_state,
2604 struct gen7_mfd_context *gen7_mfd_context)
2606 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2607 VAPictureParameterBufferJPEGBaseline *pic_param;
2608 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2609 dri_bo *slice_data_bo;
2610 int i, j, max_selector = 0;
2612 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2613 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2615 /* Currently only support Baseline DCT */
2616 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2617 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2619 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2621 intel_batchbuffer_emit_mi_flush(batch);
2622 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2623 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2624 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2625 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2626 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2628 for (j = 0; j < decode_state->num_slice_params; j++) {
2629 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2630 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2631 slice_data_bo = decode_state->slice_datas[j]->bo;
2632 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2634 if (j == decode_state->num_slice_params - 1)
2635 next_slice_group_param = NULL;
2637 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2639 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2642 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2644 if (i < decode_state->slice_params[j]->num_elements - 1)
2645 next_slice_param = slice_param + 1;
2647 next_slice_param = next_slice_group_param;
2649 for (component = 0; component < slice_param->num_components; component++) {
2650 if (max_selector < slice_param->components[component].dc_table_selector)
2651 max_selector = slice_param->components[component].dc_table_selector;
2653 if (max_selector < slice_param->components[component].ac_table_selector)
2654 max_selector = slice_param->components[component].ac_table_selector;
2661 assert(max_selector < 2);
2662 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2664 for (j = 0; j < decode_state->num_slice_params; j++) {
2665 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2666 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2667 slice_data_bo = decode_state->slice_datas[j]->bo;
2668 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2670 if (j == decode_state->num_slice_params - 1)
2671 next_slice_group_param = NULL;
2673 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2675 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2676 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2678 if (i < decode_state->slice_params[j]->num_elements - 1)
2679 next_slice_param = slice_param + 1;
2681 next_slice_param = next_slice_group_param;
2683 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2688 intel_batchbuffer_end_atomic(batch);
2689 intel_batchbuffer_flush(batch);
2692 static const int vp8_dc_qlookup[128] =
2694 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2695 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2696 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2697 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2698 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2699 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2700 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2701 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2704 static const int vp8_ac_qlookup[128] =
2706 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2707 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2708 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2709 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2710 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2711 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2712 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2713 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2716 static inline unsigned int vp8_clip_quantization_index(int index)
2727 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2728 struct decode_state *decode_state,
2729 struct gen7_mfd_context *gen7_mfd_context)
2731 struct object_surface *obj_surface;
2732 struct i965_driver_data *i965 = i965_driver_data(ctx);
2734 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2735 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2736 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2738 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2739 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2741 intel_update_vp8_frame_store_index(ctx,
2744 gen7_mfd_context->reference_surface);
2746 /* Current decoded picture */
2747 obj_surface = decode_state->render_object;
2748 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2750 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2751 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2752 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2753 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2755 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2756 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2757 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2758 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2760 intel_ensure_vp8_segmentation_buffer(ctx,
2761 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2763 /* The same as AVC */
2764 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2765 bo = dri_bo_alloc(i965->intel.bufmgr,
2770 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2771 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2773 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2774 bo = dri_bo_alloc(i965->intel.bufmgr,
2775 "deblocking filter row store",
2776 width_in_mbs * 64 * 4,
2779 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2780 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2782 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2783 bo = dri_bo_alloc(i965->intel.bufmgr,
2784 "bsd mpc row store",
2785 width_in_mbs * 64 * 2,
2788 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2789 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2791 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2792 bo = dri_bo_alloc(i965->intel.bufmgr,
2794 width_in_mbs * 64 * 2,
2797 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2798 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2800 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2804 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2805 struct decode_state *decode_state,
2806 struct gen7_mfd_context *gen7_mfd_context)
2808 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2809 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2810 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2811 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2812 dri_bo *probs_bo = decode_state->probability_data->bo;
2814 unsigned int quantization_value[4][6];
2816 /* There is no safe way to error out if the segmentation buffer
2817 could not be allocated. So, instead of aborting, simply decode
2818 something even if the result may look totally inacurate */
2819 const unsigned int enable_segmentation =
2820 pic_param->pic_fields.bits.segmentation_enabled &&
2821 gen7_mfd_context->segmentation_buffer.valid;
2823 log2num = (int)log2(slice_param->num_of_partitions - 1);
2825 BEGIN_BCS_BATCH(batch, 38);
2826 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2827 OUT_BCS_BATCH(batch,
2828 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2829 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2830 OUT_BCS_BATCH(batch,
2832 pic_param->pic_fields.bits.sharpness_level << 16 |
2833 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2834 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2835 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2836 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2837 pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2838 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2839 (enable_segmentation &&
2840 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2841 (enable_segmentation &&
2842 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2843 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2844 pic_param->pic_fields.bits.filter_type << 4 |
2845 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2846 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2848 OUT_BCS_BATCH(batch,
2849 pic_param->loop_filter_level[3] << 24 |
2850 pic_param->loop_filter_level[2] << 16 |
2851 pic_param->loop_filter_level[1] << 8 |
2852 pic_param->loop_filter_level[0] << 0);
2854 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2855 for (i = 0; i < 4; i++) {
2856 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2857 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2858 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2859 /* 101581>>16 is equivalent to 155/100 */
2860 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2861 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2862 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2864 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2865 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2867 OUT_BCS_BATCH(batch,
2868 quantization_value[i][0] << 16 | /* Y1AC */
2869 quantization_value[i][1] << 0); /* Y1DC */
2870 OUT_BCS_BATCH(batch,
2871 quantization_value[i][5] << 16 | /* UVAC */
2872 quantization_value[i][4] << 0); /* UVDC */
2873 OUT_BCS_BATCH(batch,
2874 quantization_value[i][3] << 16 | /* Y2AC */
2875 quantization_value[i][2] << 0); /* Y2DC */
2878 /* CoeffProbability table for non-key frame, DW16-DW18 */
2880 OUT_BCS_RELOC(batch, probs_bo,
2881 0, I915_GEM_DOMAIN_INSTRUCTION,
2883 OUT_BCS_BATCH(batch, 0);
2884 OUT_BCS_BATCH(batch, 0);
2886 OUT_BCS_BATCH(batch, 0);
2887 OUT_BCS_BATCH(batch, 0);
2888 OUT_BCS_BATCH(batch, 0);
2891 OUT_BCS_BATCH(batch,
2892 pic_param->mb_segment_tree_probs[2] << 16 |
2893 pic_param->mb_segment_tree_probs[1] << 8 |
2894 pic_param->mb_segment_tree_probs[0] << 0);
2896 OUT_BCS_BATCH(batch,
2897 pic_param->prob_skip_false << 24 |
2898 pic_param->prob_intra << 16 |
2899 pic_param->prob_last << 8 |
2900 pic_param->prob_gf << 0);
2902 OUT_BCS_BATCH(batch,
2903 pic_param->y_mode_probs[3] << 24 |
2904 pic_param->y_mode_probs[2] << 16 |
2905 pic_param->y_mode_probs[1] << 8 |
2906 pic_param->y_mode_probs[0] << 0);
2908 OUT_BCS_BATCH(batch,
2909 pic_param->uv_mode_probs[2] << 16 |
2910 pic_param->uv_mode_probs[1] << 8 |
2911 pic_param->uv_mode_probs[0] << 0);
2913 /* MV update value, DW23-DW32 */
2914 for (i = 0; i < 2; i++) {
2915 for (j = 0; j < 20; j += 4) {
2916 OUT_BCS_BATCH(batch,
2917 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2918 pic_param->mv_probs[i][j + 2] << 16 |
2919 pic_param->mv_probs[i][j + 1] << 8 |
2920 pic_param->mv_probs[i][j + 0] << 0);
2924 OUT_BCS_BATCH(batch,
2925 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2926 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2927 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2928 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2930 OUT_BCS_BATCH(batch,
2931 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2932 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2933 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2934 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2936 /* segmentation id stream base address, DW35-DW37 */
2937 if (enable_segmentation) {
2938 OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2939 0, I915_GEM_DOMAIN_INSTRUCTION,
2941 OUT_BCS_BATCH(batch, 0);
2942 OUT_BCS_BATCH(batch, 0);
2945 OUT_BCS_BATCH(batch, 0);
2946 OUT_BCS_BATCH(batch, 0);
2947 OUT_BCS_BATCH(batch, 0);
2949 ADVANCE_BCS_BATCH(batch);
2953 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2954 VAPictureParameterBufferVP8 *pic_param,
2955 VASliceParameterBufferVP8 *slice_param,
2956 dri_bo *slice_data_bo,
2957 struct gen7_mfd_context *gen7_mfd_context)
2959 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2961 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2962 unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2963 unsigned int partition_size_0 = slice_param->partition_size[0];
2965 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2966 if (used_bits == 8) {
2969 partition_size_0 -= 1;
2972 assert(slice_param->num_of_partitions >= 2);
2973 assert(slice_param->num_of_partitions <= 9);
2975 log2num = (int)log2(slice_param->num_of_partitions - 1);
2977 BEGIN_BCS_BATCH(batch, 22);
2978 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2979 OUT_BCS_BATCH(batch,
2980 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2981 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2983 (slice_param->macroblock_offset & 0x7));
2984 OUT_BCS_BATCH(batch,
2985 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2988 OUT_BCS_BATCH(batch, partition_size_0 + 1);
2989 OUT_BCS_BATCH(batch, offset);
2990 //partion sizes in bytes are present after the above first partition when there are more than one token partition
2991 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2992 for (i = 1; i < 9; i++) {
2993 if (i < slice_param->num_of_partitions) {
2994 OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
2995 OUT_BCS_BATCH(batch, offset);
2997 OUT_BCS_BATCH(batch, 0);
2998 OUT_BCS_BATCH(batch, 0);
3001 offset += slice_param->partition_size[i];
3004 OUT_BCS_BATCH(batch,
3005 1 << 31 | /* concealment method */
3008 ADVANCE_BCS_BATCH(batch);
3012 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3013 struct decode_state *decode_state,
3014 struct gen7_mfd_context *gen7_mfd_context)
3016 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3017 VAPictureParameterBufferVP8 *pic_param;
3018 VASliceParameterBufferVP8 *slice_param;
3019 dri_bo *slice_data_bo;
3021 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3022 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3024 /* one slice per frame */
3025 if (decode_state->num_slice_params != 1 ||
3026 (!decode_state->slice_params ||
3027 !decode_state->slice_params[0] ||
3028 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3029 (!decode_state->slice_datas ||
3030 !decode_state->slice_datas[0] ||
3031 !decode_state->slice_datas[0]->bo) ||
3032 !decode_state->probability_data) {
3033 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3038 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3039 slice_data_bo = decode_state->slice_datas[0]->bo;
3041 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3042 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3043 intel_batchbuffer_emit_mi_flush(batch);
3044 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3045 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3046 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3047 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3048 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3049 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3050 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3051 intel_batchbuffer_end_atomic(batch);
3052 intel_batchbuffer_flush(batch);
3056 gen8_mfd_decode_picture(VADriverContextP ctx,
3058 union codec_state *codec_state,
3059 struct hw_context *hw_context)
3062 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3063 struct decode_state *decode_state = &codec_state->decode;
3066 assert(gen7_mfd_context);
3068 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3070 if (vaStatus != VA_STATUS_SUCCESS)
3073 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3076 case VAProfileMPEG2Simple:
3077 case VAProfileMPEG2Main:
3078 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3081 case VAProfileH264ConstrainedBaseline:
3082 case VAProfileH264Main:
3083 case VAProfileH264High:
3084 case VAProfileH264StereoHigh:
3085 case VAProfileH264MultiviewHigh:
3086 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3089 case VAProfileVC1Simple:
3090 case VAProfileVC1Main:
3091 case VAProfileVC1Advanced:
3092 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3095 case VAProfileJPEGBaseline:
3096 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3099 case VAProfileVP8Version0_3:
3100 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3108 vaStatus = VA_STATUS_SUCCESS;
3115 gen8_mfd_context_destroy(void *hw_context)
3117 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3119 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3120 gen7_mfd_context->post_deblocking_output.bo = NULL;
3122 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3123 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3125 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3126 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3128 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3129 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3131 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3132 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3134 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3135 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3137 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3138 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3140 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3141 gen7_mfd_context->segmentation_buffer.bo = NULL;
3143 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3145 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3146 free(gen7_mfd_context);
3149 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3150 struct gen7_mfd_context *gen7_mfd_context)
3152 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3153 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3154 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3155 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3159 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3161 struct intel_driver_data *intel = intel_driver_data(ctx);
3162 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3165 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3166 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3167 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3169 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3170 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3171 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3174 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3175 gen7_mfd_context->segmentation_buffer.valid = 0;
3177 switch (obj_config->profile) {
3178 case VAProfileMPEG2Simple:
3179 case VAProfileMPEG2Main:
3180 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3183 case VAProfileH264ConstrainedBaseline:
3184 case VAProfileH264Main:
3185 case VAProfileH264High:
3186 case VAProfileH264StereoHigh:
3187 case VAProfileH264MultiviewHigh:
3188 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3193 return (struct hw_context *)gen7_mfd_context;