2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 assert((obj_surface->size & 0x3f) == 0);
78 obj_surface->private_data = gen7_avc_surface;
81 /* DMV buffers now relate to the whole frame, irrespective of
83 if (gen7_avc_surface->dmv_top == NULL) {
84 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
85 "direct mv w/r buffer",
86 width_in_mbs * height_in_mbs * 128,
88 assert(gen7_avc_surface->dmv_top);
93 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
94 struct decode_state *decode_state,
96 struct gen7_mfd_context *gen7_mfd_context)
98 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
100 assert(standard_select == MFX_FORMAT_MPEG2 ||
101 standard_select == MFX_FORMAT_AVC ||
102 standard_select == MFX_FORMAT_VC1 ||
103 standard_select == MFX_FORMAT_JPEG ||
104 standard_select == MFX_FORMAT_VP8);
106 BEGIN_BCS_BATCH(batch, 5);
107 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
109 (MFX_LONG_MODE << 17) | /* Currently only support long format */
110 (MFD_MODE_VLD << 15) | /* VLD mode */
111 (0 << 10) | /* disable Stream-Out */
112 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
113 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
114 (0 << 5) | /* not in stitch mode */
115 (MFX_CODEC_DECODE << 4) | /* decoding mode */
116 (standard_select << 0));
118 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
119 (0 << 3) | /* terminate if AVC mbdata error occurs */
120 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
123 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
124 OUT_BCS_BATCH(batch, 0); /* reserved */
125 ADVANCE_BCS_BATCH(batch);
129 gen8_mfd_surface_state(VADriverContextP ctx,
130 struct decode_state *decode_state,
132 struct gen7_mfd_context *gen7_mfd_context)
134 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
135 struct object_surface *obj_surface = decode_state->render_object;
136 unsigned int y_cb_offset;
137 unsigned int y_cr_offset;
138 unsigned int surface_format;
142 y_cb_offset = obj_surface->y_cb_offset;
143 y_cr_offset = obj_surface->y_cr_offset;
145 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
146 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
148 BEGIN_BCS_BATCH(batch, 6);
149 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
150 OUT_BCS_BATCH(batch, 0);
152 ((obj_surface->orig_height - 1) << 18) |
153 ((obj_surface->orig_width - 1) << 4));
155 (surface_format << 28) | /* 420 planar YUV surface */
156 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
157 (0 << 22) | /* surface object control state, ignored */
158 ((obj_surface->width - 1) << 3) | /* pitch */
159 (0 << 2) | /* must be 0 */
160 (1 << 1) | /* must be tiled */
161 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
163 (0 << 16) | /* X offset for U(Cb), must be 0 */
164 (y_cb_offset << 0)); /* Y offset for U(Cb) */
166 (0 << 16) | /* X offset for V(Cr), must be 0 */
167 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
168 ADVANCE_BCS_BATCH(batch);
172 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
173 struct decode_state *decode_state,
175 struct gen7_mfd_context *gen7_mfd_context)
177 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
180 BEGIN_BCS_BATCH(batch, 61);
181 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
182 /* Pre-deblock 1-3 */
183 if (gen7_mfd_context->pre_deblocking_output.valid)
184 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
185 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
188 OUT_BCS_BATCH(batch, 0);
190 OUT_BCS_BATCH(batch, 0);
191 OUT_BCS_BATCH(batch, 0);
192 /* Post-debloing 4-6 */
193 if (gen7_mfd_context->post_deblocking_output.valid)
194 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
195 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198 OUT_BCS_BATCH(batch, 0);
200 OUT_BCS_BATCH(batch, 0);
201 OUT_BCS_BATCH(batch, 0);
203 /* uncompressed-video & stream out 7-12 */
204 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
205 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206 OUT_BCS_BATCH(batch, 0);
207 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
209 OUT_BCS_BATCH(batch, 0);
211 /* intra row-store scratch 13-15 */
212 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
213 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
214 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
217 OUT_BCS_BATCH(batch, 0);
219 OUT_BCS_BATCH(batch, 0);
220 OUT_BCS_BATCH(batch, 0);
221 /* deblocking-filter-row-store 16-18 */
222 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
223 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
224 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
227 OUT_BCS_BATCH(batch, 0);
228 OUT_BCS_BATCH(batch, 0);
229 OUT_BCS_BATCH(batch, 0);
232 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
233 struct object_surface *obj_surface;
235 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
236 gen7_mfd_context->reference_surface[i].obj_surface &&
237 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
238 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
240 OUT_BCS_RELOC(batch, obj_surface->bo,
241 I915_GEM_DOMAIN_INSTRUCTION, 0,
244 OUT_BCS_BATCH(batch, 0);
247 OUT_BCS_BATCH(batch, 0);
250 /* reference property 51 */
251 OUT_BCS_BATCH(batch, 0);
253 /* Macroblock status & ILDB 52-57 */
254 OUT_BCS_BATCH(batch, 0);
255 OUT_BCS_BATCH(batch, 0);
256 OUT_BCS_BATCH(batch, 0);
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
259 OUT_BCS_BATCH(batch, 0);
261 /* the second Macroblock status 58-60 */
262 OUT_BCS_BATCH(batch, 0);
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
266 ADVANCE_BCS_BATCH(batch);
270 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
271 dri_bo *slice_data_bo,
273 struct gen7_mfd_context *gen7_mfd_context)
275 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
277 BEGIN_BCS_BATCH(batch, 26);
278 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
280 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
281 OUT_BCS_BATCH(batch, 0);
282 OUT_BCS_BATCH(batch, 0);
283 /* Upper bound 4-5 */
284 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
285 OUT_BCS_BATCH(batch, 0);
287 /* MFX indirect MV 6-10 */
288 OUT_BCS_BATCH(batch, 0);
289 OUT_BCS_BATCH(batch, 0);
290 OUT_BCS_BATCH(batch, 0);
291 OUT_BCS_BATCH(batch, 0);
292 OUT_BCS_BATCH(batch, 0);
294 /* MFX IT_COFF 11-15 */
295 OUT_BCS_BATCH(batch, 0);
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
299 OUT_BCS_BATCH(batch, 0);
301 /* MFX IT_DBLK 16-20 */
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
306 OUT_BCS_BATCH(batch, 0);
308 /* MFX PAK_BSE object for encoder 21-25 */
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
313 OUT_BCS_BATCH(batch, 0);
315 ADVANCE_BCS_BATCH(batch);
319 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
320 struct decode_state *decode_state,
322 struct gen7_mfd_context *gen7_mfd_context)
324 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
326 BEGIN_BCS_BATCH(batch, 10);
327 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
329 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
330 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
331 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
334 OUT_BCS_BATCH(batch, 0);
336 OUT_BCS_BATCH(batch, 0);
337 OUT_BCS_BATCH(batch, 0);
338 /* MPR Row Store Scratch buffer 4-6 */
339 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
340 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
341 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
344 OUT_BCS_BATCH(batch, 0);
346 OUT_BCS_BATCH(batch, 0);
347 OUT_BCS_BATCH(batch, 0);
350 if (gen7_mfd_context->bitplane_read_buffer.valid)
351 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
352 I915_GEM_DOMAIN_INSTRUCTION, 0,
355 OUT_BCS_BATCH(batch, 0);
356 OUT_BCS_BATCH(batch, 0);
357 OUT_BCS_BATCH(batch, 0);
358 ADVANCE_BCS_BATCH(batch);
362 gen8_mfd_qm_state(VADriverContextP ctx,
366 struct gen7_mfd_context *gen7_mfd_context)
368 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
369 unsigned int qm_buffer[16];
371 assert(qm_length <= 16 * 4);
372 memcpy(qm_buffer, qm, qm_length);
374 BEGIN_BCS_BATCH(batch, 18);
375 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
376 OUT_BCS_BATCH(batch, qm_type << 0);
377 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
378 ADVANCE_BCS_BATCH(batch);
382 gen8_mfd_avc_img_state(VADriverContextP ctx,
383 struct decode_state *decode_state,
384 struct gen7_mfd_context *gen7_mfd_context)
386 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
388 int mbaff_frame_flag;
389 unsigned int width_in_mbs, height_in_mbs;
390 VAPictureParameterBufferH264 *pic_param;
392 assert(decode_state->pic_param && decode_state->pic_param->buffer);
393 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
394 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
396 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
398 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
403 if ((img_struct & 0x1) == 0x1) {
404 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
406 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
409 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
410 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
411 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
413 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
416 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
417 !pic_param->pic_fields.bits.field_pic_flag);
419 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
420 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
422 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
423 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
424 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
425 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
427 BEGIN_BCS_BATCH(batch, 17);
428 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
430 (width_in_mbs * height_in_mbs - 1));
432 ((height_in_mbs - 1) << 16) |
433 ((width_in_mbs - 1) << 0));
435 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
436 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
437 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
438 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
439 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
440 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
443 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
444 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
445 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
446 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
447 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
448 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
449 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
450 (mbaff_frame_flag << 1) |
451 (pic_param->pic_fields.bits.field_pic_flag << 0));
452 OUT_BCS_BATCH(batch, 0);
453 OUT_BCS_BATCH(batch, 0);
454 OUT_BCS_BATCH(batch, 0);
455 OUT_BCS_BATCH(batch, 0);
456 OUT_BCS_BATCH(batch, 0);
457 OUT_BCS_BATCH(batch, 0);
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 ADVANCE_BCS_BATCH(batch);
468 gen8_mfd_avc_qm_state(VADriverContextP ctx,
469 struct decode_state *decode_state,
470 struct gen7_mfd_context *gen7_mfd_context)
472 VAIQMatrixBufferH264 *iq_matrix;
473 VAPictureParameterBufferH264 *pic_param;
475 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
476 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
478 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
480 assert(decode_state->pic_param && decode_state->pic_param->buffer);
481 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
483 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
484 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
486 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
487 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
488 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
493 gen8_mfd_avc_picid_state(VADriverContextP ctx,
494 struct decode_state *decode_state,
495 struct gen7_mfd_context *gen7_mfd_context)
497 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
499 BEGIN_BCS_BATCH(batch, 10);
500 OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
501 OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
502 OUT_BCS_BATCH(batch, 0);
503 OUT_BCS_BATCH(batch, 0);
504 OUT_BCS_BATCH(batch, 0);
505 OUT_BCS_BATCH(batch, 0);
506 OUT_BCS_BATCH(batch, 0);
507 OUT_BCS_BATCH(batch, 0);
508 OUT_BCS_BATCH(batch, 0);
509 OUT_BCS_BATCH(batch, 0);
510 ADVANCE_BCS_BATCH(batch);
514 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
515 struct decode_state *decode_state,
516 VAPictureParameterBufferH264 *pic_param,
517 VASliceParameterBufferH264 *slice_param,
518 struct gen7_mfd_context *gen7_mfd_context)
520 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
521 struct object_surface *obj_surface;
522 GenAvcSurface *gen7_avc_surface;
523 VAPictureH264 *va_pic;
526 BEGIN_BCS_BATCH(batch, 71);
527 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
529 /* reference surfaces 0..15 */
530 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
531 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
532 gen7_mfd_context->reference_surface[i].obj_surface &&
533 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
535 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
536 gen7_avc_surface = obj_surface->private_data;
538 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
539 I915_GEM_DOMAIN_INSTRUCTION, 0,
541 OUT_BCS_BATCH(batch, 0);
543 OUT_BCS_BATCH(batch, 0);
544 OUT_BCS_BATCH(batch, 0);
548 OUT_BCS_BATCH(batch, 0);
550 /* the current decoding frame/field */
551 va_pic = &pic_param->CurrPic;
552 obj_surface = decode_state->render_object;
553 assert(obj_surface->bo && obj_surface->private_data);
554 gen7_avc_surface = obj_surface->private_data;
556 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
557 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
560 OUT_BCS_BATCH(batch, 0);
561 OUT_BCS_BATCH(batch, 0);
564 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
565 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
568 assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
570 for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
571 va_pic = &pic_param->ReferenceFrames[j];
573 if (va_pic->flags & VA_PICTURE_H264_INVALID)
576 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
583 assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
585 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
586 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
588 OUT_BCS_BATCH(batch, 0);
589 OUT_BCS_BATCH(batch, 0);
593 va_pic = &pic_param->CurrPic;
594 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
595 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
597 ADVANCE_BCS_BATCH(batch);
601 gen8_mfd_avc_slice_state(VADriverContextP ctx,
602 VAPictureParameterBufferH264 *pic_param,
603 VASliceParameterBufferH264 *slice_param,
604 VASliceParameterBufferH264 *next_slice_param,
605 struct gen7_mfd_context *gen7_mfd_context)
607 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
608 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
609 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
610 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
611 int num_ref_idx_l0, num_ref_idx_l1;
612 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
613 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
614 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
617 if (slice_param->slice_type == SLICE_TYPE_I ||
618 slice_param->slice_type == SLICE_TYPE_SI) {
619 slice_type = SLICE_TYPE_I;
620 } else if (slice_param->slice_type == SLICE_TYPE_P ||
621 slice_param->slice_type == SLICE_TYPE_SP) {
622 slice_type = SLICE_TYPE_P;
624 assert(slice_param->slice_type == SLICE_TYPE_B);
625 slice_type = SLICE_TYPE_B;
628 if (slice_type == SLICE_TYPE_I) {
629 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
630 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
633 } else if (slice_type == SLICE_TYPE_P) {
634 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
635 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
638 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
639 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
642 first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
643 slice_hor_pos = first_mb_in_slice % width_in_mbs;
644 slice_ver_pos = first_mb_in_slice / width_in_mbs;
646 if (next_slice_param) {
647 first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
648 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
649 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
651 next_slice_hor_pos = 0;
652 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
655 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
656 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
657 OUT_BCS_BATCH(batch, slice_type);
659 (num_ref_idx_l1 << 24) |
660 (num_ref_idx_l0 << 16) |
661 (slice_param->chroma_log2_weight_denom << 8) |
662 (slice_param->luma_log2_weight_denom << 0));
664 (slice_param->direct_spatial_mv_pred_flag << 29) |
665 (slice_param->disable_deblocking_filter_idc << 27) |
666 (slice_param->cabac_init_idc << 24) |
667 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
668 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
669 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
671 (slice_ver_pos << 24) |
672 (slice_hor_pos << 16) |
673 (first_mb_in_slice << 0));
675 (next_slice_ver_pos << 16) |
676 (next_slice_hor_pos << 0));
678 (next_slice_param == NULL) << 19); /* last slice flag */
679 OUT_BCS_BATCH(batch, 0);
680 OUT_BCS_BATCH(batch, 0);
681 OUT_BCS_BATCH(batch, 0);
682 OUT_BCS_BATCH(batch, 0);
683 ADVANCE_BCS_BATCH(batch);
687 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
688 VAPictureParameterBufferH264 *pic_param,
689 VASliceParameterBufferH264 *slice_param,
690 struct gen7_mfd_context *gen7_mfd_context)
692 gen6_send_avc_ref_idx_state(
693 gen7_mfd_context->base.batch,
695 gen7_mfd_context->reference_surface
700 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
701 VAPictureParameterBufferH264 *pic_param,
702 VASliceParameterBufferH264 *slice_param,
703 struct gen7_mfd_context *gen7_mfd_context)
705 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
706 int i, j, num_weight_offset_table = 0;
707 short weightoffsets[32 * 6];
709 if ((slice_param->slice_type == SLICE_TYPE_P ||
710 slice_param->slice_type == SLICE_TYPE_SP) &&
711 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
712 num_weight_offset_table = 1;
715 if ((slice_param->slice_type == SLICE_TYPE_B) &&
716 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
717 num_weight_offset_table = 2;
720 for (i = 0; i < num_weight_offset_table; i++) {
721 BEGIN_BCS_BATCH(batch, 98);
722 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
723 OUT_BCS_BATCH(batch, i);
726 for (j = 0; j < 32; j++) {
727 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
728 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
729 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
730 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
731 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
732 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
735 for (j = 0; j < 32; j++) {
736 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
737 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
738 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
739 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
740 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
741 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
745 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
746 ADVANCE_BCS_BATCH(batch);
751 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
752 VAPictureParameterBufferH264 *pic_param,
753 VASliceParameterBufferH264 *slice_param,
754 dri_bo *slice_data_bo,
755 VASliceParameterBufferH264 *next_slice_param,
756 struct gen7_mfd_context *gen7_mfd_context)
758 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
759 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
761 pic_param->pic_fields.bits.entropy_coding_mode_flag);
763 /* the input bitsteam format on GEN7 differs from GEN6 */
764 BEGIN_BCS_BATCH(batch, 6);
765 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
767 (slice_param->slice_data_size));
768 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
776 ((slice_data_bit_offset >> 3) << 16) |
780 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
781 (slice_data_bit_offset & 0x7));
782 OUT_BCS_BATCH(batch, 0);
783 ADVANCE_BCS_BATCH(batch);
787 gen8_mfd_avc_context_init(
788 VADriverContextP ctx,
789 struct gen7_mfd_context *gen7_mfd_context
792 /* Initialize flat scaling lists */
793 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
797 gen8_mfd_avc_decode_init(VADriverContextP ctx,
798 struct decode_state *decode_state,
799 struct gen7_mfd_context *gen7_mfd_context)
801 VAPictureParameterBufferH264 *pic_param;
802 VASliceParameterBufferH264 *slice_param;
803 struct i965_driver_data *i965 = i965_driver_data(ctx);
804 struct object_surface *obj_surface;
806 int i, j, enable_avc_ildb = 0;
807 unsigned int width_in_mbs, height_in_mbs;
809 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
810 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
811 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
813 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
814 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
815 assert((slice_param->slice_type == SLICE_TYPE_I) ||
816 (slice_param->slice_type == SLICE_TYPE_SI) ||
817 (slice_param->slice_type == SLICE_TYPE_P) ||
818 (slice_param->slice_type == SLICE_TYPE_SP) ||
819 (slice_param->slice_type == SLICE_TYPE_B));
821 if (slice_param->disable_deblocking_filter_idc != 1) {
830 assert(decode_state->pic_param && decode_state->pic_param->buffer);
831 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
832 intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
833 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
834 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
835 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
836 assert(height_in_mbs > 0 && height_in_mbs <= 256);
838 /* Current decoded picture */
839 obj_surface = decode_state->render_object;
840 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
841 obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
843 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
844 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
846 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
847 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
848 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
849 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
851 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
852 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
853 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
854 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
856 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
857 bo = dri_bo_alloc(i965->intel.bufmgr,
862 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
863 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
865 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
866 bo = dri_bo_alloc(i965->intel.bufmgr,
867 "deblocking filter row store",
868 width_in_mbs * 64 * 4,
871 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
872 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
874 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
875 bo = dri_bo_alloc(i965->intel.bufmgr,
877 width_in_mbs * 64 * 2,
880 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
881 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
883 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
884 bo = dri_bo_alloc(i965->intel.bufmgr,
886 width_in_mbs * 64 * 2,
889 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
890 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
892 gen7_mfd_context->bitplane_read_buffer.valid = 0;
896 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
897 struct decode_state *decode_state,
898 struct gen7_mfd_context *gen7_mfd_context)
900 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
901 VAPictureParameterBufferH264 *pic_param;
902 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
903 dri_bo *slice_data_bo;
906 assert(decode_state->pic_param && decode_state->pic_param->buffer);
907 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
908 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
910 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
911 intel_batchbuffer_emit_mi_flush(batch);
912 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
913 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
914 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
915 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
916 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
917 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
918 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
920 for (j = 0; j < decode_state->num_slice_params; j++) {
921 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
922 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
923 slice_data_bo = decode_state->slice_datas[j]->bo;
924 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
926 if (j == decode_state->num_slice_params - 1)
927 next_slice_group_param = NULL;
929 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
931 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
932 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
933 assert((slice_param->slice_type == SLICE_TYPE_I) ||
934 (slice_param->slice_type == SLICE_TYPE_SI) ||
935 (slice_param->slice_type == SLICE_TYPE_P) ||
936 (slice_param->slice_type == SLICE_TYPE_SP) ||
937 (slice_param->slice_type == SLICE_TYPE_B));
939 if (i < decode_state->slice_params[j]->num_elements - 1)
940 next_slice_param = slice_param + 1;
942 next_slice_param = next_slice_group_param;
944 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
945 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
946 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
947 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
948 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
953 intel_batchbuffer_end_atomic(batch);
954 intel_batchbuffer_flush(batch);
958 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
959 struct decode_state *decode_state,
960 struct gen7_mfd_context *gen7_mfd_context)
962 VAPictureParameterBufferMPEG2 *pic_param;
963 struct i965_driver_data *i965 = i965_driver_data(ctx);
964 struct object_surface *obj_surface;
966 unsigned int width_in_mbs;
968 assert(decode_state->pic_param && decode_state->pic_param->buffer);
969 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
970 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
972 mpeg2_set_reference_surfaces(
974 gen7_mfd_context->reference_surface,
979 /* Current decoded picture */
980 obj_surface = decode_state->render_object;
981 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
983 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
984 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
985 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
986 gen7_mfd_context->pre_deblocking_output.valid = 1;
988 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
989 bo = dri_bo_alloc(i965->intel.bufmgr,
994 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
995 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
997 gen7_mfd_context->post_deblocking_output.valid = 0;
998 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
999 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1000 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1001 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1005 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1006 struct decode_state *decode_state,
1007 struct gen7_mfd_context *gen7_mfd_context)
1009 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1010 VAPictureParameterBufferMPEG2 *pic_param;
1011 unsigned int slice_concealment_disable_bit = 0;
1013 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1014 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1016 slice_concealment_disable_bit = 1;
1018 BEGIN_BCS_BATCH(batch, 13);
1019 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1020 OUT_BCS_BATCH(batch,
1021 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1022 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1023 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1024 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1025 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1026 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1027 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1028 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1029 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1030 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1031 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1032 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1033 OUT_BCS_BATCH(batch,
1034 pic_param->picture_coding_type << 9);
1035 OUT_BCS_BATCH(batch,
1036 (slice_concealment_disable_bit << 31) |
1037 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1038 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1039 OUT_BCS_BATCH(batch, 0);
1040 OUT_BCS_BATCH(batch, 0);
1041 OUT_BCS_BATCH(batch, 0);
1042 OUT_BCS_BATCH(batch, 0);
1043 OUT_BCS_BATCH(batch, 0);
1044 OUT_BCS_BATCH(batch, 0);
1045 OUT_BCS_BATCH(batch, 0);
1046 OUT_BCS_BATCH(batch, 0);
1047 OUT_BCS_BATCH(batch, 0);
1048 ADVANCE_BCS_BATCH(batch);
1052 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1053 struct decode_state *decode_state,
1054 struct gen7_mfd_context *gen7_mfd_context)
1056 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1059 /* Update internal QM state */
1060 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1061 VAIQMatrixBufferMPEG2 * const iq_matrix =
1062 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1064 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1065 iq_matrix->load_intra_quantiser_matrix) {
1066 gen_iq_matrix->load_intra_quantiser_matrix =
1067 iq_matrix->load_intra_quantiser_matrix;
1068 if (iq_matrix->load_intra_quantiser_matrix) {
1069 for (j = 0; j < 64; j++)
1070 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1071 iq_matrix->intra_quantiser_matrix[j];
1075 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1076 iq_matrix->load_non_intra_quantiser_matrix) {
1077 gen_iq_matrix->load_non_intra_quantiser_matrix =
1078 iq_matrix->load_non_intra_quantiser_matrix;
1079 if (iq_matrix->load_non_intra_quantiser_matrix) {
1080 for (j = 0; j < 64; j++)
1081 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1082 iq_matrix->non_intra_quantiser_matrix[j];
1087 /* Commit QM state to HW */
1088 for (i = 0; i < 2; i++) {
1089 unsigned char *qm = NULL;
1093 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1094 qm = gen_iq_matrix->intra_quantiser_matrix;
1095 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1098 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1099 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1100 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1107 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1112 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1113 VAPictureParameterBufferMPEG2 *pic_param,
1114 VASliceParameterBufferMPEG2 *slice_param,
1115 VASliceParameterBufferMPEG2 *next_slice_param,
1116 struct gen7_mfd_context *gen7_mfd_context)
1118 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1119 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1120 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1122 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1123 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1125 is_field_pic_wa = is_field_pic &&
1126 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1128 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1129 hpos0 = slice_param->slice_horizontal_position;
1131 if (next_slice_param == NULL) {
1132 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1135 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1136 hpos1 = next_slice_param->slice_horizontal_position;
1139 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1141 BEGIN_BCS_BATCH(batch, 5);
1142 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1143 OUT_BCS_BATCH(batch,
1144 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1145 OUT_BCS_BATCH(batch,
1146 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1147 OUT_BCS_BATCH(batch,
1151 (next_slice_param == NULL) << 5 |
1152 (next_slice_param == NULL) << 3 |
1153 (slice_param->macroblock_offset & 0x7));
1154 OUT_BCS_BATCH(batch,
1155 (slice_param->quantiser_scale_code << 24) |
1156 (vpos1 << 8 | hpos1));
1157 ADVANCE_BCS_BATCH(batch);
1161 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1162 struct decode_state *decode_state,
1163 struct gen7_mfd_context *gen7_mfd_context)
1165 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1166 VAPictureParameterBufferMPEG2 *pic_param;
1167 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1168 dri_bo *slice_data_bo;
1171 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1172 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1174 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1175 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1176 intel_batchbuffer_emit_mi_flush(batch);
1177 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1178 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1179 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1180 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1181 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1182 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1184 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1185 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1186 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1188 for (j = 0; j < decode_state->num_slice_params; j++) {
1189 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1190 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1191 slice_data_bo = decode_state->slice_datas[j]->bo;
1192 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1194 if (j == decode_state->num_slice_params - 1)
1195 next_slice_group_param = NULL;
1197 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1199 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1200 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1202 if (i < decode_state->slice_params[j]->num_elements - 1)
1203 next_slice_param = slice_param + 1;
1205 next_slice_param = next_slice_group_param;
1207 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1212 intel_batchbuffer_end_atomic(batch);
1213 intel_batchbuffer_flush(batch);
1216 static const int va_to_gen7_vc1_pic_type[5] = {
1220 GEN7_VC1_BI_PICTURE,
1224 static const int va_to_gen7_vc1_mv[4] = {
1226 2, /* 1-MV half-pel */
1227 3, /* 1-MV half-pef bilinear */
1231 static const int b_picture_scale_factor[21] = {
1232 128, 85, 170, 64, 192,
1233 51, 102, 153, 204, 43,
1234 215, 37, 74, 111, 148,
1235 185, 222, 32, 96, 160,
1239 static const int va_to_gen7_vc1_condover[3] = {
1245 static const int va_to_gen7_vc1_profile[4] = {
1246 GEN7_VC1_SIMPLE_PROFILE,
1247 GEN7_VC1_MAIN_PROFILE,
1248 GEN7_VC1_RESERVED_PROFILE,
1249 GEN7_VC1_ADVANCED_PROFILE
1253 gen8_mfd_free_vc1_surface(void **data)
1255 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1257 if (!gen7_vc1_surface)
1260 dri_bo_unreference(gen7_vc1_surface->dmv);
1261 free(gen7_vc1_surface);
1266 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1267 VAPictureParameterBufferVC1 *pic_param,
1268 struct object_surface *obj_surface)
1270 struct i965_driver_data *i965 = i965_driver_data(ctx);
1271 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1272 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1273 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1275 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1277 if (!gen7_vc1_surface) {
1278 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1279 assert((obj_surface->size & 0x3f) == 0);
1280 obj_surface->private_data = gen7_vc1_surface;
1283 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1285 if (gen7_vc1_surface->dmv == NULL) {
1286 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1287 "direct mv w/r buffer",
1288 width_in_mbs * height_in_mbs * 64,
1294 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1295 struct decode_state *decode_state,
1296 struct gen7_mfd_context *gen7_mfd_context)
1298 VAPictureParameterBufferVC1 *pic_param;
1299 struct i965_driver_data *i965 = i965_driver_data(ctx);
1300 struct object_surface *obj_surface;
1305 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1306 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1307 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1308 picture_type = pic_param->picture_fields.bits.picture_type;
1310 intel_update_vc1_frame_store_index(ctx,
1313 gen7_mfd_context->reference_surface);
1315 /* Current decoded picture */
1316 obj_surface = decode_state->render_object;
1317 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1318 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1320 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1321 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1322 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1323 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1325 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1326 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1327 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1328 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1330 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1331 bo = dri_bo_alloc(i965->intel.bufmgr,
1336 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1337 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1339 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1340 bo = dri_bo_alloc(i965->intel.bufmgr,
1341 "deblocking filter row store",
1342 width_in_mbs * 7 * 64,
1345 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1346 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1348 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1349 bo = dri_bo_alloc(i965->intel.bufmgr,
1350 "bsd mpc row store",
1354 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1355 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1357 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1359 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1360 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1362 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1363 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1364 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1365 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1367 uint8_t *src = NULL, *dst = NULL;
1369 assert(decode_state->bit_plane->buffer);
1370 src = decode_state->bit_plane->buffer;
1372 bo = dri_bo_alloc(i965->intel.bufmgr,
1374 bitplane_width * height_in_mbs,
1377 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1379 dri_bo_map(bo, True);
1380 assert(bo->virtual);
1383 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1384 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1385 int src_index, dst_index;
1389 src_index = (src_h * width_in_mbs + src_w) / 2;
1390 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1391 src_value = ((src[src_index] >> src_shift) & 0xf);
1393 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1397 dst_index = src_w / 2;
1398 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1402 dst[src_w / 2] >>= 4;
1404 dst += bitplane_width;
1409 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1413 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1414 struct decode_state *decode_state,
1415 struct gen7_mfd_context *gen7_mfd_context)
1417 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1418 VAPictureParameterBufferVC1 *pic_param;
1419 struct object_surface *obj_surface;
1420 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1421 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1422 int unified_mv_mode;
1423 int ref_field_pic_polarity = 0;
1424 int scale_factor = 0;
1426 int dmv_surface_valid = 0;
1432 int interpolation_mode = 0;
1434 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1435 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1437 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1438 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1439 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1440 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1441 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1442 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1443 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1444 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1447 alt_pquant_config = 0;
1448 alt_pquant_edge_mask = 0;
1449 } else if (dquant == 2) {
1450 alt_pquant_config = 1;
1451 alt_pquant_edge_mask = 0xf;
1453 assert(dquant == 1);
1454 if (dquantfrm == 0) {
1455 alt_pquant_config = 0;
1456 alt_pquant_edge_mask = 0;
1459 assert(dquantfrm == 1);
1460 alt_pquant_config = 1;
1462 switch (dqprofile) {
1464 if (dqbilevel == 0) {
1465 alt_pquant_config = 2;
1466 alt_pquant_edge_mask = 0;
1468 assert(dqbilevel == 1);
1469 alt_pquant_config = 3;
1470 alt_pquant_edge_mask = 0;
1475 alt_pquant_edge_mask = 0xf;
1480 alt_pquant_edge_mask = 0x9;
1482 alt_pquant_edge_mask = (0x3 << dqdbedge);
1487 alt_pquant_edge_mask = (0x1 << dqsbedge);
1496 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1497 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1498 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1500 assert(pic_param->mv_fields.bits.mv_mode < 4);
1501 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1504 if (pic_param->sequence_fields.bits.interlace == 1 &&
1505 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1506 /* FIXME: calculate reference field picture polarity */
1508 ref_field_pic_polarity = 0;
1511 if (pic_param->b_picture_fraction < 21)
1512 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1514 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1516 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1517 picture_type == GEN7_VC1_I_PICTURE)
1518 picture_type = GEN7_VC1_BI_PICTURE;
1520 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1521 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1523 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1526 * 8.3.6.2.1 Transform Type Selection
1527 * If variable-sized transform coding is not enabled,
1528 * then the 8x8 transform shall be used for all blocks.
1529 * it is also MFX_VC1_PIC_STATE requirement.
1531 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1532 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1533 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1537 if (picture_type == GEN7_VC1_B_PICTURE) {
1538 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1540 obj_surface = decode_state->reference_objects[1];
1543 gen7_vc1_surface = obj_surface->private_data;
1545 if (!gen7_vc1_surface ||
1546 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1547 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1548 dmv_surface_valid = 0;
1550 dmv_surface_valid = 1;
1553 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1555 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1556 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1558 if (pic_param->picture_fields.bits.top_field_first)
1564 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1565 brfd = pic_param->reference_fields.bits.reference_distance;
1566 brfd = (scale_factor * brfd) >> 8;
1567 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1574 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1575 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1576 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1580 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1581 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1584 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1585 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1586 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1588 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1589 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1595 assert(pic_param->conditional_overlap_flag < 3);
1596 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1598 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1599 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1600 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1601 interpolation_mode = 9; /* Half-pel bilinear */
1602 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1603 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1604 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1605 interpolation_mode = 1; /* Half-pel bicubic */
1607 interpolation_mode = 0; /* Quarter-pel bicubic */
1609 BEGIN_BCS_BATCH(batch, 6);
1610 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1611 OUT_BCS_BATCH(batch,
1612 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1613 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1614 OUT_BCS_BATCH(batch,
1615 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1616 dmv_surface_valid << 15 |
1617 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1618 pic_param->rounding_control << 13 |
1619 pic_param->sequence_fields.bits.syncmarker << 12 |
1620 interpolation_mode << 8 |
1621 0 << 7 | /* FIXME: scale up or down ??? */
1622 pic_param->range_reduction_frame << 6 |
1623 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1625 !pic_param->picture_fields.bits.is_first_field << 3 |
1626 (pic_param->sequence_fields.bits.profile == 3) << 0);
1627 OUT_BCS_BATCH(batch,
1628 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1629 picture_type << 26 |
1632 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1634 OUT_BCS_BATCH(batch,
1635 unified_mv_mode << 28 |
1636 pic_param->mv_fields.bits.four_mv_switch << 27 |
1637 pic_param->fast_uvmc_flag << 26 |
1638 ref_field_pic_polarity << 25 |
1639 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1640 pic_param->reference_fields.bits.reference_distance << 20 |
1641 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1642 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1643 pic_param->mv_fields.bits.extended_mv_range << 8 |
1644 alt_pquant_edge_mask << 4 |
1645 alt_pquant_config << 2 |
1646 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1647 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1648 OUT_BCS_BATCH(batch,
1649 !!pic_param->bitplane_present.value << 31 |
1650 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1651 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1652 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1653 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1654 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1655 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1656 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1657 pic_param->mv_fields.bits.mv_table << 20 |
1658 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1659 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1660 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1661 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1662 pic_param->mb_mode_table << 8 |
1664 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1665 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1666 pic_param->cbp_table << 0);
1667 ADVANCE_BCS_BATCH(batch);
1671 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1672 struct decode_state *decode_state,
1673 struct gen7_mfd_context *gen7_mfd_context)
1675 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1676 VAPictureParameterBufferVC1 *pic_param;
1677 int intensitycomp_single;
1679 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1680 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1682 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1683 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1684 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1686 BEGIN_BCS_BATCH(batch, 6);
1687 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1688 OUT_BCS_BATCH(batch,
1689 0 << 14 | /* FIXME: double ??? */
1691 intensitycomp_single << 10 |
1692 intensitycomp_single << 8 |
1693 0 << 4 | /* FIXME: interlace mode */
1695 OUT_BCS_BATCH(batch,
1696 pic_param->luma_shift << 16 |
1697 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1698 OUT_BCS_BATCH(batch, 0);
1699 OUT_BCS_BATCH(batch, 0);
1700 OUT_BCS_BATCH(batch, 0);
1701 ADVANCE_BCS_BATCH(batch);
1705 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1706 struct decode_state *decode_state,
1707 struct gen7_mfd_context *gen7_mfd_context)
1709 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1710 struct object_surface *obj_surface;
1711 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1713 obj_surface = decode_state->render_object;
1715 if (obj_surface && obj_surface->private_data) {
1716 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1719 obj_surface = decode_state->reference_objects[1];
1721 if (obj_surface && obj_surface->private_data) {
1722 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1725 BEGIN_BCS_BATCH(batch, 7);
1726 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1728 if (dmv_write_buffer)
1729 OUT_BCS_RELOC(batch, dmv_write_buffer,
1730 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1733 OUT_BCS_BATCH(batch, 0);
1735 OUT_BCS_BATCH(batch, 0);
1736 OUT_BCS_BATCH(batch, 0);
1738 if (dmv_read_buffer)
1739 OUT_BCS_RELOC(batch, dmv_read_buffer,
1740 I915_GEM_DOMAIN_INSTRUCTION, 0,
1743 OUT_BCS_BATCH(batch, 0);
1745 OUT_BCS_BATCH(batch, 0);
1746 OUT_BCS_BATCH(batch, 0);
1748 ADVANCE_BCS_BATCH(batch);
1752 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1754 int out_slice_data_bit_offset;
1755 int slice_header_size = in_slice_data_bit_offset / 8;
1759 out_slice_data_bit_offset = in_slice_data_bit_offset;
1761 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1762 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1767 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1770 return out_slice_data_bit_offset;
1774 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1775 VAPictureParameterBufferVC1 *pic_param,
1776 VASliceParameterBufferVC1 *slice_param,
1777 VASliceParameterBufferVC1 *next_slice_param,
1778 dri_bo *slice_data_bo,
1779 struct gen7_mfd_context *gen7_mfd_context)
1781 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1782 int next_slice_start_vert_pos;
1783 int macroblock_offset;
1784 uint8_t *slice_data = NULL;
1786 dri_bo_map(slice_data_bo, 0);
1787 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1788 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1789 slice_param->macroblock_offset,
1790 pic_param->sequence_fields.bits.profile);
1791 dri_bo_unmap(slice_data_bo);
1793 if (next_slice_param)
1794 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1796 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1798 BEGIN_BCS_BATCH(batch, 5);
1799 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1800 OUT_BCS_BATCH(batch,
1801 slice_param->slice_data_size - (macroblock_offset >> 3));
1802 OUT_BCS_BATCH(batch,
1803 slice_param->slice_data_offset + (macroblock_offset >> 3));
1804 OUT_BCS_BATCH(batch,
1805 slice_param->slice_vertical_position << 16 |
1806 next_slice_start_vert_pos << 0);
1807 OUT_BCS_BATCH(batch,
1808 (macroblock_offset & 0x7));
1809 ADVANCE_BCS_BATCH(batch);
1813 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1814 struct decode_state *decode_state,
1815 struct gen7_mfd_context *gen7_mfd_context)
1817 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1818 VAPictureParameterBufferVC1 *pic_param;
1819 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1820 dri_bo *slice_data_bo;
1823 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1824 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1826 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1827 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1828 intel_batchbuffer_emit_mi_flush(batch);
1829 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1830 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1831 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1832 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1833 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1834 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1835 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1837 for (j = 0; j < decode_state->num_slice_params; j++) {
1838 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1839 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1840 slice_data_bo = decode_state->slice_datas[j]->bo;
1841 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1843 if (j == decode_state->num_slice_params - 1)
1844 next_slice_group_param = NULL;
1846 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1848 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1849 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1851 if (i < decode_state->slice_params[j]->num_elements - 1)
1852 next_slice_param = slice_param + 1;
1854 next_slice_param = next_slice_group_param;
1856 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1861 intel_batchbuffer_end_atomic(batch);
1862 intel_batchbuffer_flush(batch);
1866 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1867 struct decode_state *decode_state,
1868 struct gen7_mfd_context *gen7_mfd_context)
1870 struct object_surface *obj_surface;
1871 VAPictureParameterBufferJPEGBaseline *pic_param;
1872 int subsampling = SUBSAMPLE_YUV420;
1873 int fourcc = VA_FOURCC_IMC3;
1875 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1877 if (pic_param->num_components == 1)
1878 subsampling = SUBSAMPLE_YUV400;
1879 else if (pic_param->num_components == 3) {
1880 int h1 = pic_param->components[0].h_sampling_factor;
1881 int h2 = pic_param->components[1].h_sampling_factor;
1882 int h3 = pic_param->components[2].h_sampling_factor;
1883 int v1 = pic_param->components[0].v_sampling_factor;
1884 int v2 = pic_param->components[1].v_sampling_factor;
1885 int v3 = pic_param->components[2].v_sampling_factor;
1887 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1888 v1 == 2 && v2 == 1 && v3 == 1) {
1889 subsampling = SUBSAMPLE_YUV420;
1890 fourcc = VA_FOURCC_IMC3;
1891 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1892 v1 == 1 && v2 == 1 && v3 == 1) {
1893 subsampling = SUBSAMPLE_YUV422H;
1894 fourcc = VA_FOURCC_422H;
1895 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1896 v1 == 1 && v2 == 1 && v3 == 1) {
1897 subsampling = SUBSAMPLE_YUV444;
1898 fourcc = VA_FOURCC_444P;
1899 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1900 v1 == 1 && v2 == 1 && v3 == 1) {
1901 subsampling = SUBSAMPLE_YUV411;
1902 fourcc = VA_FOURCC_411P;
1903 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1904 v1 == 2 && v2 == 1 && v3 == 1) {
1905 subsampling = SUBSAMPLE_YUV422V;
1906 fourcc = VA_FOURCC_422V;
1907 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1908 v1 == 2 && v2 == 2 && v3 == 2) {
1909 subsampling = SUBSAMPLE_YUV422H;
1910 fourcc = VA_FOURCC_422H;
1911 } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1912 v1 == 2 && v2 == 1 && v3 == 1) {
1913 subsampling = SUBSAMPLE_YUV422V;
1914 fourcc = VA_FOURCC_422V;
1922 /* Current decoded picture */
1923 obj_surface = decode_state->render_object;
1924 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1926 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1927 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1928 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1929 gen7_mfd_context->pre_deblocking_output.valid = 1;
1931 gen7_mfd_context->post_deblocking_output.bo = NULL;
1932 gen7_mfd_context->post_deblocking_output.valid = 0;
1934 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1935 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1937 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1938 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1940 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1941 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1943 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1944 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1946 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1947 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1950 static const int va_to_gen7_jpeg_rotation[4] = {
1951 GEN7_JPEG_ROTATION_0,
1952 GEN7_JPEG_ROTATION_90,
1953 GEN7_JPEG_ROTATION_180,
1954 GEN7_JPEG_ROTATION_270
1958 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1959 struct decode_state *decode_state,
1960 struct gen7_mfd_context *gen7_mfd_context)
1962 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1963 VAPictureParameterBufferJPEGBaseline *pic_param;
1964 int chroma_type = GEN7_YUV420;
1965 int frame_width_in_blks;
1966 int frame_height_in_blks;
1968 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1969 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1971 if (pic_param->num_components == 1)
1972 chroma_type = GEN7_YUV400;
1973 else if (pic_param->num_components == 3) {
1974 int h1 = pic_param->components[0].h_sampling_factor;
1975 int h2 = pic_param->components[1].h_sampling_factor;
1976 int h3 = pic_param->components[2].h_sampling_factor;
1977 int v1 = pic_param->components[0].v_sampling_factor;
1978 int v2 = pic_param->components[1].v_sampling_factor;
1979 int v3 = pic_param->components[2].v_sampling_factor;
1981 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1982 v1 == 2 && v2 == 1 && v3 == 1)
1983 chroma_type = GEN7_YUV420;
1984 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1985 v1 == 1 && v2 == 1 && v3 == 1)
1986 chroma_type = GEN7_YUV422H_2Y;
1987 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1988 v1 == 1 && v2 == 1 && v3 == 1)
1989 chroma_type = GEN7_YUV444;
1990 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1991 v1 == 1 && v2 == 1 && v3 == 1)
1992 chroma_type = GEN7_YUV411;
1993 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1994 v1 == 2 && v2 == 1 && v3 == 1)
1995 chroma_type = GEN7_YUV422V_2Y;
1996 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1997 v1 == 2 && v2 == 2 && v3 == 2)
1998 chroma_type = GEN7_YUV422H_4Y;
1999 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2000 v1 == 2 && v2 == 1 && v3 == 1)
2001 chroma_type = GEN7_YUV422V_4Y;
2006 if (chroma_type == GEN7_YUV400 ||
2007 chroma_type == GEN7_YUV444 ||
2008 chroma_type == GEN7_YUV422V_2Y) {
2009 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2010 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2011 } else if (chroma_type == GEN7_YUV411) {
2012 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2013 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2015 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2016 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2019 BEGIN_BCS_BATCH(batch, 3);
2020 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2021 OUT_BCS_BATCH(batch,
2022 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2023 (chroma_type << 0));
2024 OUT_BCS_BATCH(batch,
2025 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2026 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2027 ADVANCE_BCS_BATCH(batch);
2030 static const int va_to_gen7_jpeg_hufftable[2] = {
2036 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2037 struct decode_state *decode_state,
2038 struct gen7_mfd_context *gen7_mfd_context,
2041 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2042 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2045 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2048 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2050 for (index = 0; index < num_tables; index++) {
2051 int id = va_to_gen7_jpeg_hufftable[index];
2052 if (!huffman_table->load_huffman_table[index])
2054 BEGIN_BCS_BATCH(batch, 53);
2055 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2056 OUT_BCS_BATCH(batch, id);
2057 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2058 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2059 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2060 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2061 ADVANCE_BCS_BATCH(batch);
2065 static const int va_to_gen7_jpeg_qm[5] = {
2067 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2068 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2069 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2070 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2074 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2075 struct decode_state *decode_state,
2076 struct gen7_mfd_context *gen7_mfd_context)
2078 VAPictureParameterBufferJPEGBaseline *pic_param;
2079 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2082 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2085 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2086 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2088 assert(pic_param->num_components <= 3);
2090 for (index = 0; index < pic_param->num_components; index++) {
2091 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2093 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2094 unsigned char raster_qm[64];
2097 if (id > 4 || id < 1)
2100 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2103 qm_type = va_to_gen7_jpeg_qm[id];
2105 for (j = 0; j < 64; j++)
2106 raster_qm[zigzag_direct[j]] = qm[j];
2108 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2113 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2114 VAPictureParameterBufferJPEGBaseline *pic_param,
2115 VASliceParameterBufferJPEGBaseline *slice_param,
2116 VASliceParameterBufferJPEGBaseline *next_slice_param,
2117 dri_bo *slice_data_bo,
2118 struct gen7_mfd_context *gen7_mfd_context)
2120 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2121 int scan_component_mask = 0;
2124 assert(slice_param->num_components > 0);
2125 assert(slice_param->num_components < 4);
2126 assert(slice_param->num_components <= pic_param->num_components);
2128 for (i = 0; i < slice_param->num_components; i++) {
2129 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2131 scan_component_mask |= (1 << 0);
2134 scan_component_mask |= (1 << 1);
2137 scan_component_mask |= (1 << 2);
2145 BEGIN_BCS_BATCH(batch, 6);
2146 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2147 OUT_BCS_BATCH(batch,
2148 slice_param->slice_data_size);
2149 OUT_BCS_BATCH(batch,
2150 slice_param->slice_data_offset);
2151 OUT_BCS_BATCH(batch,
2152 slice_param->slice_horizontal_position << 16 |
2153 slice_param->slice_vertical_position << 0);
2154 OUT_BCS_BATCH(batch,
2155 ((slice_param->num_components != 1) << 30) | /* interleaved */
2156 (scan_component_mask << 27) | /* scan components */
2157 (0 << 26) | /* disable interrupt allowed */
2158 (slice_param->num_mcus << 0)); /* MCU count */
2159 OUT_BCS_BATCH(batch,
2160 (slice_param->restart_interval << 0)); /* RestartInterval */
2161 ADVANCE_BCS_BATCH(batch);
2164 /* Workaround for JPEG decoding on Ivybridge */
2168 i965_CreateSurfaces(VADriverContextP ctx,
2173 VASurfaceID *surfaces);
2178 unsigned char data[32];
2180 int data_bit_offset;
2182 } gen7_jpeg_wa_clip = {
2186 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2187 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2195 gen8_jpeg_wa_init(VADriverContextP ctx,
2196 struct gen7_mfd_context *gen7_mfd_context)
2198 struct i965_driver_data *i965 = i965_driver_data(ctx);
2200 struct object_surface *obj_surface;
2202 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2203 i965_DestroySurfaces(ctx,
2204 &gen7_mfd_context->jpeg_wa_surface_id,
2207 status = i965_CreateSurfaces(ctx,
2208 gen7_jpeg_wa_clip.width,
2209 gen7_jpeg_wa_clip.height,
2210 VA_RT_FORMAT_YUV420,
2212 &gen7_mfd_context->jpeg_wa_surface_id);
2213 assert(status == VA_STATUS_SUCCESS);
2215 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2216 assert(obj_surface);
2217 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2218 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2220 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2221 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2225 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2227 gen7_jpeg_wa_clip.data_size,
2228 gen7_jpeg_wa_clip.data);
2233 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2234 struct gen7_mfd_context *gen7_mfd_context)
2236 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2238 BEGIN_BCS_BATCH(batch, 5);
2239 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2240 OUT_BCS_BATCH(batch,
2241 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2242 (MFD_MODE_VLD << 15) | /* VLD mode */
2243 (0 << 10) | /* disable Stream-Out */
2244 (0 << 9) | /* Post Deblocking Output */
2245 (1 << 8) | /* Pre Deblocking Output */
2246 (0 << 5) | /* not in stitch mode */
2247 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2248 (MFX_FORMAT_AVC << 0));
2249 OUT_BCS_BATCH(batch,
2250 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2251 (0 << 3) | /* terminate if AVC mbdata error occurs */
2252 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2255 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2256 OUT_BCS_BATCH(batch, 0); /* reserved */
2257 ADVANCE_BCS_BATCH(batch);
2261 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2262 struct gen7_mfd_context *gen7_mfd_context)
2264 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2265 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2267 BEGIN_BCS_BATCH(batch, 6);
2268 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2269 OUT_BCS_BATCH(batch, 0);
2270 OUT_BCS_BATCH(batch,
2271 ((obj_surface->orig_width - 1) << 18) |
2272 ((obj_surface->orig_height - 1) << 4));
2273 OUT_BCS_BATCH(batch,
2274 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2275 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2276 (0 << 22) | /* surface object control state, ignored */
2277 ((obj_surface->width - 1) << 3) | /* pitch */
2278 (0 << 2) | /* must be 0 */
2279 (1 << 1) | /* must be tiled */
2280 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2281 OUT_BCS_BATCH(batch,
2282 (0 << 16) | /* X offset for U(Cb), must be 0 */
2283 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2284 OUT_BCS_BATCH(batch,
2285 (0 << 16) | /* X offset for V(Cr), must be 0 */
2286 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2287 ADVANCE_BCS_BATCH(batch);
2291 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2292 struct gen7_mfd_context *gen7_mfd_context)
2294 struct i965_driver_data *i965 = i965_driver_data(ctx);
2295 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2296 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2300 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2305 BEGIN_BCS_BATCH(batch, 61);
2306 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2307 OUT_BCS_RELOC(batch,
2309 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2311 OUT_BCS_BATCH(batch, 0);
2312 OUT_BCS_BATCH(batch, 0);
2315 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2316 OUT_BCS_BATCH(batch, 0);
2317 OUT_BCS_BATCH(batch, 0);
2319 /* uncompressed-video & stream out 7-12 */
2320 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2321 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2322 OUT_BCS_BATCH(batch, 0);
2323 OUT_BCS_BATCH(batch, 0);
2324 OUT_BCS_BATCH(batch, 0);
2325 OUT_BCS_BATCH(batch, 0);
2327 /* the DW 13-15 is for intra row store scratch */
2328 OUT_BCS_RELOC(batch,
2330 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2332 OUT_BCS_BATCH(batch, 0);
2333 OUT_BCS_BATCH(batch, 0);
2335 /* the DW 16-18 is for deblocking filter */
2336 OUT_BCS_BATCH(batch, 0);
2337 OUT_BCS_BATCH(batch, 0);
2338 OUT_BCS_BATCH(batch, 0);
2341 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2342 OUT_BCS_BATCH(batch, 0);
2343 OUT_BCS_BATCH(batch, 0);
2345 OUT_BCS_BATCH(batch, 0);
2347 /* the DW52-54 is for mb status address */
2348 OUT_BCS_BATCH(batch, 0);
2349 OUT_BCS_BATCH(batch, 0);
2350 OUT_BCS_BATCH(batch, 0);
2351 /* the DW56-60 is for ILDB & second ILDB address */
2352 OUT_BCS_BATCH(batch, 0);
2353 OUT_BCS_BATCH(batch, 0);
2354 OUT_BCS_BATCH(batch, 0);
2355 OUT_BCS_BATCH(batch, 0);
2356 OUT_BCS_BATCH(batch, 0);
2357 OUT_BCS_BATCH(batch, 0);
2359 ADVANCE_BCS_BATCH(batch);
2361 dri_bo_unreference(intra_bo);
2365 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2366 struct gen7_mfd_context *gen7_mfd_context)
2368 struct i965_driver_data *i965 = i965_driver_data(ctx);
2369 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2370 dri_bo *bsd_mpc_bo, *mpr_bo;
2372 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2373 "bsd mpc row store",
2374 11520, /* 1.5 * 120 * 64 */
2377 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2379 7680, /* 1. 0 * 120 * 64 */
2382 BEGIN_BCS_BATCH(batch, 10);
2383 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2385 OUT_BCS_RELOC(batch,
2387 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2390 OUT_BCS_BATCH(batch, 0);
2391 OUT_BCS_BATCH(batch, 0);
2393 OUT_BCS_RELOC(batch,
2395 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2397 OUT_BCS_BATCH(batch, 0);
2398 OUT_BCS_BATCH(batch, 0);
2400 OUT_BCS_BATCH(batch, 0);
2401 OUT_BCS_BATCH(batch, 0);
2402 OUT_BCS_BATCH(batch, 0);
2404 ADVANCE_BCS_BATCH(batch);
2406 dri_bo_unreference(bsd_mpc_bo);
2407 dri_bo_unreference(mpr_bo);
2411 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2412 struct gen7_mfd_context *gen7_mfd_context)
2418 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2419 struct gen7_mfd_context *gen7_mfd_context)
2421 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2423 int mbaff_frame_flag = 0;
2424 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2426 BEGIN_BCS_BATCH(batch, 16);
2427 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2428 OUT_BCS_BATCH(batch,
2429 width_in_mbs * height_in_mbs);
2430 OUT_BCS_BATCH(batch,
2431 ((height_in_mbs - 1) << 16) |
2432 ((width_in_mbs - 1) << 0));
2433 OUT_BCS_BATCH(batch,
2438 (0 << 12) | /* differ from GEN6 */
2441 OUT_BCS_BATCH(batch,
2442 (1 << 10) | /* 4:2:0 */
2443 (1 << 7) | /* CABAC */
2449 (mbaff_frame_flag << 1) |
2451 OUT_BCS_BATCH(batch, 0);
2452 OUT_BCS_BATCH(batch, 0);
2453 OUT_BCS_BATCH(batch, 0);
2454 OUT_BCS_BATCH(batch, 0);
2455 OUT_BCS_BATCH(batch, 0);
2456 OUT_BCS_BATCH(batch, 0);
2457 OUT_BCS_BATCH(batch, 0);
2458 OUT_BCS_BATCH(batch, 0);
2459 OUT_BCS_BATCH(batch, 0);
2460 OUT_BCS_BATCH(batch, 0);
2461 OUT_BCS_BATCH(batch, 0);
2462 ADVANCE_BCS_BATCH(batch);
2466 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2467 struct gen7_mfd_context *gen7_mfd_context)
2469 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2472 BEGIN_BCS_BATCH(batch, 71);
2473 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2475 /* reference surfaces 0..15 */
2476 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2477 OUT_BCS_BATCH(batch, 0); /* top */
2478 OUT_BCS_BATCH(batch, 0); /* bottom */
2481 OUT_BCS_BATCH(batch, 0);
2483 /* the current decoding frame/field */
2484 OUT_BCS_BATCH(batch, 0); /* top */
2485 OUT_BCS_BATCH(batch, 0);
2486 OUT_BCS_BATCH(batch, 0);
2489 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2490 OUT_BCS_BATCH(batch, 0);
2491 OUT_BCS_BATCH(batch, 0);
2494 OUT_BCS_BATCH(batch, 0);
2495 OUT_BCS_BATCH(batch, 0);
2497 ADVANCE_BCS_BATCH(batch);
2501 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2502 struct gen7_mfd_context *gen7_mfd_context)
2504 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2506 BEGIN_BCS_BATCH(batch, 11);
2507 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2508 OUT_BCS_RELOC(batch,
2509 gen7_mfd_context->jpeg_wa_slice_data_bo,
2510 I915_GEM_DOMAIN_INSTRUCTION, 0,
2512 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2513 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2514 OUT_BCS_BATCH(batch, 0);
2515 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2516 OUT_BCS_BATCH(batch, 0);
2517 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2518 OUT_BCS_BATCH(batch, 0);
2519 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2520 OUT_BCS_BATCH(batch, 0);
2521 ADVANCE_BCS_BATCH(batch);
2525 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2526 struct gen7_mfd_context *gen7_mfd_context)
2528 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2530 /* the input bitsteam format on GEN7 differs from GEN6 */
2531 BEGIN_BCS_BATCH(batch, 6);
2532 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2533 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2534 OUT_BCS_BATCH(batch, 0);
2535 OUT_BCS_BATCH(batch,
2541 OUT_BCS_BATCH(batch,
2542 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2545 (1 << 3) | /* LastSlice Flag */
2546 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2547 OUT_BCS_BATCH(batch, 0);
2548 ADVANCE_BCS_BATCH(batch);
2552 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2553 struct gen7_mfd_context *gen7_mfd_context)
2555 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2556 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2557 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2558 int first_mb_in_slice = 0;
2559 int slice_type = SLICE_TYPE_I;
2561 BEGIN_BCS_BATCH(batch, 11);
2562 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2563 OUT_BCS_BATCH(batch, slice_type);
2564 OUT_BCS_BATCH(batch,
2565 (num_ref_idx_l1 << 24) |
2566 (num_ref_idx_l0 << 16) |
2569 OUT_BCS_BATCH(batch,
2571 (1 << 27) | /* disable Deblocking */
2573 (gen7_jpeg_wa_clip.qp << 16) |
2576 OUT_BCS_BATCH(batch,
2577 (slice_ver_pos << 24) |
2578 (slice_hor_pos << 16) |
2579 (first_mb_in_slice << 0));
2580 OUT_BCS_BATCH(batch,
2581 (next_slice_ver_pos << 16) |
2582 (next_slice_hor_pos << 0));
2583 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2584 OUT_BCS_BATCH(batch, 0);
2585 OUT_BCS_BATCH(batch, 0);
2586 OUT_BCS_BATCH(batch, 0);
2587 OUT_BCS_BATCH(batch, 0);
2588 ADVANCE_BCS_BATCH(batch);
2592 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2593 struct gen7_mfd_context *gen7_mfd_context)
2595 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2596 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2597 intel_batchbuffer_emit_mi_flush(batch);
2598 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2599 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2600 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2601 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2602 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2603 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2604 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2606 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2607 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2608 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2614 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2615 struct decode_state *decode_state,
2616 struct gen7_mfd_context *gen7_mfd_context)
2618 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2619 VAPictureParameterBufferJPEGBaseline *pic_param;
2620 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2621 dri_bo *slice_data_bo;
2622 int i, j, max_selector = 0;
2624 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2625 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2627 /* Currently only support Baseline DCT */
2628 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2629 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2631 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2633 intel_batchbuffer_emit_mi_flush(batch);
2634 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2635 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2636 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2637 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2638 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2640 for (j = 0; j < decode_state->num_slice_params; j++) {
2641 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2642 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2643 slice_data_bo = decode_state->slice_datas[j]->bo;
2644 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2646 if (j == decode_state->num_slice_params - 1)
2647 next_slice_group_param = NULL;
2649 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2651 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2654 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2656 if (i < decode_state->slice_params[j]->num_elements - 1)
2657 next_slice_param = slice_param + 1;
2659 next_slice_param = next_slice_group_param;
2661 for (component = 0; component < slice_param->num_components; component++) {
2662 if (max_selector < slice_param->components[component].dc_table_selector)
2663 max_selector = slice_param->components[component].dc_table_selector;
2665 if (max_selector < slice_param->components[component].ac_table_selector)
2666 max_selector = slice_param->components[component].ac_table_selector;
2673 assert(max_selector < 2);
2674 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2676 for (j = 0; j < decode_state->num_slice_params; j++) {
2677 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2678 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2679 slice_data_bo = decode_state->slice_datas[j]->bo;
2680 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2682 if (j == decode_state->num_slice_params - 1)
2683 next_slice_group_param = NULL;
2685 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2687 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2688 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2690 if (i < decode_state->slice_params[j]->num_elements - 1)
2691 next_slice_param = slice_param + 1;
2693 next_slice_param = next_slice_group_param;
2695 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2700 intel_batchbuffer_end_atomic(batch);
2701 intel_batchbuffer_flush(batch);
2704 static const int vp8_dc_qlookup[128] =
2706 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2707 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2708 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2709 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2710 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2711 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2712 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2713 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2716 static const int vp8_ac_qlookup[128] =
2718 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2719 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2720 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2721 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2722 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2723 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2724 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2725 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2728 static inline unsigned int vp8_clip_quantization_index(int index)
2739 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2740 struct decode_state *decode_state,
2741 struct gen7_mfd_context *gen7_mfd_context)
2743 struct object_surface *obj_surface;
2744 struct i965_driver_data *i965 = i965_driver_data(ctx);
2746 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2747 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2748 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2750 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2751 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2753 intel_update_vp8_frame_store_index(ctx,
2756 gen7_mfd_context->reference_surface);
2758 /* Current decoded picture */
2759 obj_surface = decode_state->render_object;
2760 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2762 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2763 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2764 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2765 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2767 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2768 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2769 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2770 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2772 intel_ensure_vp8_segmentation_buffer(ctx,
2773 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2775 /* The same as AVC */
2776 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2777 bo = dri_bo_alloc(i965->intel.bufmgr,
2782 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2783 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2785 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2786 bo = dri_bo_alloc(i965->intel.bufmgr,
2787 "deblocking filter row store",
2788 width_in_mbs * 64 * 4,
2791 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2792 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2794 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2795 bo = dri_bo_alloc(i965->intel.bufmgr,
2796 "bsd mpc row store",
2797 width_in_mbs * 64 * 2,
2800 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2801 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2803 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2804 bo = dri_bo_alloc(i965->intel.bufmgr,
2806 width_in_mbs * 64 * 2,
2809 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2810 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2812 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2816 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2817 struct decode_state *decode_state,
2818 struct gen7_mfd_context *gen7_mfd_context)
2820 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2821 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2822 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2823 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2824 dri_bo *probs_bo = decode_state->probability_data->bo;
2826 unsigned int quantization_value[4][6];
2828 /* There is no safe way to error out if the segmentation buffer
2829 could not be allocated. So, instead of aborting, simply decode
2830 something even if the result may look totally inacurate */
2831 const unsigned int enable_segmentation =
2832 pic_param->pic_fields.bits.segmentation_enabled &&
2833 gen7_mfd_context->segmentation_buffer.valid;
2835 log2num = (int)log2(slice_param->num_of_partitions - 1);
2837 BEGIN_BCS_BATCH(batch, 38);
2838 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2839 OUT_BCS_BATCH(batch,
2840 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2841 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2842 OUT_BCS_BATCH(batch,
2844 pic_param->pic_fields.bits.sharpness_level << 16 |
2845 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2846 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2847 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2848 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2849 pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2850 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2851 (enable_segmentation &&
2852 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2853 (enable_segmentation &&
2854 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2855 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2856 pic_param->pic_fields.bits.filter_type << 4 |
2857 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2858 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2860 OUT_BCS_BATCH(batch,
2861 pic_param->loop_filter_level[3] << 24 |
2862 pic_param->loop_filter_level[2] << 16 |
2863 pic_param->loop_filter_level[1] << 8 |
2864 pic_param->loop_filter_level[0] << 0);
2866 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2867 for (i = 0; i < 4; i++) {
2868 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2869 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2870 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2871 /* 101581>>16 is equivalent to 155/100 */
2872 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2873 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2874 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2876 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2877 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2879 OUT_BCS_BATCH(batch,
2880 quantization_value[i][0] << 16 | /* Y1AC */
2881 quantization_value[i][1] << 0); /* Y1DC */
2882 OUT_BCS_BATCH(batch,
2883 quantization_value[i][5] << 16 | /* UVAC */
2884 quantization_value[i][4] << 0); /* UVDC */
2885 OUT_BCS_BATCH(batch,
2886 quantization_value[i][3] << 16 | /* Y2AC */
2887 quantization_value[i][2] << 0); /* Y2DC */
2890 /* CoeffProbability table for non-key frame, DW16-DW18 */
2892 OUT_BCS_RELOC(batch, probs_bo,
2893 0, I915_GEM_DOMAIN_INSTRUCTION,
2895 OUT_BCS_BATCH(batch, 0);
2896 OUT_BCS_BATCH(batch, 0);
2898 OUT_BCS_BATCH(batch, 0);
2899 OUT_BCS_BATCH(batch, 0);
2900 OUT_BCS_BATCH(batch, 0);
2903 OUT_BCS_BATCH(batch,
2904 pic_param->mb_segment_tree_probs[2] << 16 |
2905 pic_param->mb_segment_tree_probs[1] << 8 |
2906 pic_param->mb_segment_tree_probs[0] << 0);
2908 OUT_BCS_BATCH(batch,
2909 pic_param->prob_skip_false << 24 |
2910 pic_param->prob_intra << 16 |
2911 pic_param->prob_last << 8 |
2912 pic_param->prob_gf << 0);
2914 OUT_BCS_BATCH(batch,
2915 pic_param->y_mode_probs[3] << 24 |
2916 pic_param->y_mode_probs[2] << 16 |
2917 pic_param->y_mode_probs[1] << 8 |
2918 pic_param->y_mode_probs[0] << 0);
2920 OUT_BCS_BATCH(batch,
2921 pic_param->uv_mode_probs[2] << 16 |
2922 pic_param->uv_mode_probs[1] << 8 |
2923 pic_param->uv_mode_probs[0] << 0);
2925 /* MV update value, DW23-DW32 */
2926 for (i = 0; i < 2; i++) {
2927 for (j = 0; j < 20; j += 4) {
2928 OUT_BCS_BATCH(batch,
2929 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2930 pic_param->mv_probs[i][j + 2] << 16 |
2931 pic_param->mv_probs[i][j + 1] << 8 |
2932 pic_param->mv_probs[i][j + 0] << 0);
2936 OUT_BCS_BATCH(batch,
2937 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2938 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2939 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2940 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2942 OUT_BCS_BATCH(batch,
2943 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2944 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2945 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2946 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2948 /* segmentation id stream base address, DW35-DW37 */
2949 if (enable_segmentation) {
2950 OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2951 0, I915_GEM_DOMAIN_INSTRUCTION,
2953 OUT_BCS_BATCH(batch, 0);
2954 OUT_BCS_BATCH(batch, 0);
2957 OUT_BCS_BATCH(batch, 0);
2958 OUT_BCS_BATCH(batch, 0);
2959 OUT_BCS_BATCH(batch, 0);
2961 ADVANCE_BCS_BATCH(batch);
2965 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2966 VAPictureParameterBufferVP8 *pic_param,
2967 VASliceParameterBufferVP8 *slice_param,
2968 dri_bo *slice_data_bo,
2969 struct gen7_mfd_context *gen7_mfd_context)
2971 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2973 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2974 unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2975 unsigned int partition_size_0 = slice_param->partition_size[0];
2977 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2978 if (used_bits == 8) {
2981 partition_size_0 -= 1;
2984 assert(slice_param->num_of_partitions >= 2);
2985 assert(slice_param->num_of_partitions <= 9);
2987 log2num = (int)log2(slice_param->num_of_partitions - 1);
2989 BEGIN_BCS_BATCH(batch, 22);
2990 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2991 OUT_BCS_BATCH(batch,
2992 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2993 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2995 (slice_param->macroblock_offset & 0x7));
2996 OUT_BCS_BATCH(batch,
2997 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
3000 OUT_BCS_BATCH(batch, partition_size_0);
3001 OUT_BCS_BATCH(batch, offset);
3002 //partion sizes in bytes are present after the above first partition when there are more than one token partition
3003 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3004 for (i = 1; i < 9; i++) {
3005 if (i < slice_param->num_of_partitions) {
3006 OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
3007 OUT_BCS_BATCH(batch, offset);
3009 OUT_BCS_BATCH(batch, 0);
3010 OUT_BCS_BATCH(batch, 0);
3013 offset += slice_param->partition_size[i];
3016 OUT_BCS_BATCH(batch,
3017 1 << 31 | /* concealment method */
3020 ADVANCE_BCS_BATCH(batch);
3024 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3025 struct decode_state *decode_state,
3026 struct gen7_mfd_context *gen7_mfd_context)
3028 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3029 VAPictureParameterBufferVP8 *pic_param;
3030 VASliceParameterBufferVP8 *slice_param;
3031 dri_bo *slice_data_bo;
3033 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3034 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3036 /* one slice per frame */
3037 if (decode_state->num_slice_params != 1 ||
3038 (!decode_state->slice_params ||
3039 !decode_state->slice_params[0] ||
3040 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3041 (!decode_state->slice_datas ||
3042 !decode_state->slice_datas[0] ||
3043 !decode_state->slice_datas[0]->bo) ||
3044 !decode_state->probability_data) {
3045 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3050 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3051 slice_data_bo = decode_state->slice_datas[0]->bo;
3053 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3054 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3055 intel_batchbuffer_emit_mi_flush(batch);
3056 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3057 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3058 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3059 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3060 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3061 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3062 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3063 intel_batchbuffer_end_atomic(batch);
3064 intel_batchbuffer_flush(batch);
3068 gen8_mfd_decode_picture(VADriverContextP ctx,
3070 union codec_state *codec_state,
3071 struct hw_context *hw_context)
3074 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3075 struct decode_state *decode_state = &codec_state->decode;
3078 assert(gen7_mfd_context);
3080 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3082 if (vaStatus != VA_STATUS_SUCCESS)
3085 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3088 case VAProfileMPEG2Simple:
3089 case VAProfileMPEG2Main:
3090 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3093 case VAProfileH264ConstrainedBaseline:
3094 case VAProfileH264Main:
3095 case VAProfileH264High:
3096 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3099 case VAProfileVC1Simple:
3100 case VAProfileVC1Main:
3101 case VAProfileVC1Advanced:
3102 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3105 case VAProfileJPEGBaseline:
3106 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3109 case VAProfileVP8Version0_3:
3110 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3118 vaStatus = VA_STATUS_SUCCESS;
3125 gen8_mfd_context_destroy(void *hw_context)
3127 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3129 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3130 gen7_mfd_context->post_deblocking_output.bo = NULL;
3132 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3133 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3135 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3136 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3138 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3139 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3141 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3142 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3144 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3145 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3147 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3148 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3150 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3151 gen7_mfd_context->segmentation_buffer.bo = NULL;
3153 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3155 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3156 free(gen7_mfd_context);
3159 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3160 struct gen7_mfd_context *gen7_mfd_context)
3162 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3163 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3164 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3165 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3169 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3171 struct intel_driver_data *intel = intel_driver_data(ctx);
3172 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3175 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3176 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3177 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3179 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3180 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3181 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3184 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3185 gen7_mfd_context->segmentation_buffer.valid = 0;
3187 switch (obj_config->profile) {
3188 case VAProfileMPEG2Simple:
3189 case VAProfileMPEG2Main:
3190 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3193 case VAProfileH264ConstrainedBaseline:
3194 case VAProfileH264Main:
3195 case VAProfileH264High:
3196 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3201 return (struct hw_context *)gen7_mfd_context;