2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 assert((obj_surface->size & 0x3f) == 0);
78 obj_surface->private_data = gen7_avc_surface;
81 /* DMV buffers now relate to the whole frame, irrespective of
83 if (gen7_avc_surface->dmv_top == NULL) {
84 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
85 "direct mv w/r buffer",
86 width_in_mbs * height_in_mbs * 128,
88 assert(gen7_avc_surface->dmv_top);
93 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
94 struct decode_state *decode_state,
96 struct gen7_mfd_context *gen7_mfd_context)
98 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
100 assert(standard_select == MFX_FORMAT_MPEG2 ||
101 standard_select == MFX_FORMAT_AVC ||
102 standard_select == MFX_FORMAT_VC1 ||
103 standard_select == MFX_FORMAT_JPEG ||
104 standard_select == MFX_FORMAT_VP8);
106 BEGIN_BCS_BATCH(batch, 5);
107 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
109 (MFX_LONG_MODE << 17) | /* Currently only support long format */
110 (MFD_MODE_VLD << 15) | /* VLD mode */
111 (0 << 10) | /* disable Stream-Out */
112 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
113 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
114 (0 << 5) | /* not in stitch mode */
115 (MFX_CODEC_DECODE << 4) | /* decoding mode */
116 (standard_select << 0));
118 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
119 (0 << 3) | /* terminate if AVC mbdata error occurs */
120 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
123 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
124 OUT_BCS_BATCH(batch, 0); /* reserved */
125 ADVANCE_BCS_BATCH(batch);
129 gen8_mfd_surface_state(VADriverContextP ctx,
130 struct decode_state *decode_state,
132 struct gen7_mfd_context *gen7_mfd_context)
134 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
135 struct object_surface *obj_surface = decode_state->render_object;
136 unsigned int y_cb_offset;
137 unsigned int y_cr_offset;
138 unsigned int surface_format;
142 y_cb_offset = obj_surface->y_cb_offset;
143 y_cr_offset = obj_surface->y_cr_offset;
145 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
146 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
148 BEGIN_BCS_BATCH(batch, 6);
149 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
150 OUT_BCS_BATCH(batch, 0);
152 ((obj_surface->orig_height - 1) << 18) |
153 ((obj_surface->orig_width - 1) << 4));
155 (surface_format << 28) | /* 420 planar YUV surface */
156 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
157 (0 << 22) | /* surface object control state, ignored */
158 ((obj_surface->width - 1) << 3) | /* pitch */
159 (0 << 2) | /* must be 0 */
160 (1 << 1) | /* must be tiled */
161 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
163 (0 << 16) | /* X offset for U(Cb), must be 0 */
164 (y_cb_offset << 0)); /* Y offset for U(Cb) */
166 (0 << 16) | /* X offset for V(Cr), must be 0 */
167 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
168 ADVANCE_BCS_BATCH(batch);
172 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
173 struct decode_state *decode_state,
175 struct gen7_mfd_context *gen7_mfd_context)
177 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
180 BEGIN_BCS_BATCH(batch, 61);
181 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
182 /* Pre-deblock 1-3 */
183 if (gen7_mfd_context->pre_deblocking_output.valid)
184 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
185 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
188 OUT_BCS_BATCH(batch, 0);
190 OUT_BCS_BATCH(batch, 0);
191 OUT_BCS_BATCH(batch, 0);
192 /* Post-debloing 4-6 */
193 if (gen7_mfd_context->post_deblocking_output.valid)
194 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
195 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198 OUT_BCS_BATCH(batch, 0);
200 OUT_BCS_BATCH(batch, 0);
201 OUT_BCS_BATCH(batch, 0);
203 /* uncompressed-video & stream out 7-12 */
204 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
205 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206 OUT_BCS_BATCH(batch, 0);
207 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
209 OUT_BCS_BATCH(batch, 0);
211 /* intra row-store scratch 13-15 */
212 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
213 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
214 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
217 OUT_BCS_BATCH(batch, 0);
219 OUT_BCS_BATCH(batch, 0);
220 OUT_BCS_BATCH(batch, 0);
221 /* deblocking-filter-row-store 16-18 */
222 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
223 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
224 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
227 OUT_BCS_BATCH(batch, 0);
228 OUT_BCS_BATCH(batch, 0);
229 OUT_BCS_BATCH(batch, 0);
232 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
233 struct object_surface *obj_surface;
235 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
236 gen7_mfd_context->reference_surface[i].obj_surface &&
237 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
238 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
240 OUT_BCS_RELOC(batch, obj_surface->bo,
241 I915_GEM_DOMAIN_INSTRUCTION, 0,
244 OUT_BCS_BATCH(batch, 0);
247 OUT_BCS_BATCH(batch, 0);
250 /* reference property 51 */
251 OUT_BCS_BATCH(batch, 0);
253 /* Macroblock status & ILDB 52-57 */
254 OUT_BCS_BATCH(batch, 0);
255 OUT_BCS_BATCH(batch, 0);
256 OUT_BCS_BATCH(batch, 0);
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
259 OUT_BCS_BATCH(batch, 0);
261 /* the second Macroblock status 58-60 */
262 OUT_BCS_BATCH(batch, 0);
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
266 ADVANCE_BCS_BATCH(batch);
270 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
271 dri_bo *slice_data_bo,
273 struct gen7_mfd_context *gen7_mfd_context)
275 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
277 BEGIN_BCS_BATCH(batch, 26);
278 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
280 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
281 OUT_BCS_BATCH(batch, 0);
282 OUT_BCS_BATCH(batch, 0);
283 /* Upper bound 4-5 */
284 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
285 OUT_BCS_BATCH(batch, 0);
287 /* MFX indirect MV 6-10 */
288 OUT_BCS_BATCH(batch, 0);
289 OUT_BCS_BATCH(batch, 0);
290 OUT_BCS_BATCH(batch, 0);
291 OUT_BCS_BATCH(batch, 0);
292 OUT_BCS_BATCH(batch, 0);
294 /* MFX IT_COFF 11-15 */
295 OUT_BCS_BATCH(batch, 0);
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
299 OUT_BCS_BATCH(batch, 0);
301 /* MFX IT_DBLK 16-20 */
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
306 OUT_BCS_BATCH(batch, 0);
308 /* MFX PAK_BSE object for encoder 21-25 */
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
313 OUT_BCS_BATCH(batch, 0);
315 ADVANCE_BCS_BATCH(batch);
319 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
320 struct decode_state *decode_state,
322 struct gen7_mfd_context *gen7_mfd_context)
324 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
326 BEGIN_BCS_BATCH(batch, 10);
327 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
329 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
330 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
331 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
334 OUT_BCS_BATCH(batch, 0);
336 OUT_BCS_BATCH(batch, 0);
337 OUT_BCS_BATCH(batch, 0);
338 /* MPR Row Store Scratch buffer 4-6 */
339 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
340 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
341 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
344 OUT_BCS_BATCH(batch, 0);
346 OUT_BCS_BATCH(batch, 0);
347 OUT_BCS_BATCH(batch, 0);
350 if (gen7_mfd_context->bitplane_read_buffer.valid)
351 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
352 I915_GEM_DOMAIN_INSTRUCTION, 0,
355 OUT_BCS_BATCH(batch, 0);
356 OUT_BCS_BATCH(batch, 0);
357 OUT_BCS_BATCH(batch, 0);
358 ADVANCE_BCS_BATCH(batch);
362 gen8_mfd_qm_state(VADriverContextP ctx,
366 struct gen7_mfd_context *gen7_mfd_context)
368 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
369 unsigned int qm_buffer[16];
371 assert(qm_length <= 16 * 4);
372 memcpy(qm_buffer, qm, qm_length);
374 BEGIN_BCS_BATCH(batch, 18);
375 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
376 OUT_BCS_BATCH(batch, qm_type << 0);
377 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
378 ADVANCE_BCS_BATCH(batch);
382 gen8_mfd_avc_img_state(VADriverContextP ctx,
383 struct decode_state *decode_state,
384 struct gen7_mfd_context *gen7_mfd_context)
386 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
388 int mbaff_frame_flag;
389 unsigned int width_in_mbs, height_in_mbs;
390 VAPictureParameterBufferH264 *pic_param;
392 assert(decode_state->pic_param && decode_state->pic_param->buffer);
393 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
394 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
396 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
398 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
403 if ((img_struct & 0x1) == 0x1) {
404 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
406 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
409 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
410 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
411 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
413 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
416 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
417 !pic_param->pic_fields.bits.field_pic_flag);
419 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
420 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
422 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
423 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
424 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
425 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
427 BEGIN_BCS_BATCH(batch, 17);
428 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
430 (width_in_mbs * height_in_mbs - 1));
432 ((height_in_mbs - 1) << 16) |
433 ((width_in_mbs - 1) << 0));
435 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
436 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
437 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
438 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
439 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
440 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
443 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
444 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
445 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
446 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
447 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
448 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
449 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
450 (mbaff_frame_flag << 1) |
451 (pic_param->pic_fields.bits.field_pic_flag << 0));
452 OUT_BCS_BATCH(batch, 0);
453 OUT_BCS_BATCH(batch, 0);
454 OUT_BCS_BATCH(batch, 0);
455 OUT_BCS_BATCH(batch, 0);
456 OUT_BCS_BATCH(batch, 0);
457 OUT_BCS_BATCH(batch, 0);
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 ADVANCE_BCS_BATCH(batch);
468 gen8_mfd_avc_qm_state(VADriverContextP ctx,
469 struct decode_state *decode_state,
470 struct gen7_mfd_context *gen7_mfd_context)
472 VAIQMatrixBufferH264 *iq_matrix;
473 VAPictureParameterBufferH264 *pic_param;
475 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
476 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
478 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
480 assert(decode_state->pic_param && decode_state->pic_param->buffer);
481 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
483 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
484 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
486 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
487 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
488 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
493 gen8_mfd_avc_picid_state(VADriverContextP ctx,
494 struct decode_state *decode_state,
495 struct gen7_mfd_context *gen7_mfd_context)
497 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
499 BEGIN_BCS_BATCH(batch, 10);
500 OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
501 OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
502 OUT_BCS_BATCH(batch, 0);
503 OUT_BCS_BATCH(batch, 0);
504 OUT_BCS_BATCH(batch, 0);
505 OUT_BCS_BATCH(batch, 0);
506 OUT_BCS_BATCH(batch, 0);
507 OUT_BCS_BATCH(batch, 0);
508 OUT_BCS_BATCH(batch, 0);
509 OUT_BCS_BATCH(batch, 0);
510 ADVANCE_BCS_BATCH(batch);
514 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
515 struct decode_state *decode_state,
516 VAPictureParameterBufferH264 *pic_param,
517 VASliceParameterBufferH264 *slice_param,
518 struct gen7_mfd_context *gen7_mfd_context)
520 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
521 struct object_surface *obj_surface;
522 GenAvcSurface *gen7_avc_surface;
523 VAPictureH264 *va_pic;
526 BEGIN_BCS_BATCH(batch, 71);
527 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
529 /* reference surfaces 0..15 */
530 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
531 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
532 gen7_mfd_context->reference_surface[i].obj_surface &&
533 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
535 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
536 gen7_avc_surface = obj_surface->private_data;
538 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
539 I915_GEM_DOMAIN_INSTRUCTION, 0,
541 OUT_BCS_BATCH(batch, 0);
543 OUT_BCS_BATCH(batch, 0);
544 OUT_BCS_BATCH(batch, 0);
548 OUT_BCS_BATCH(batch, 0);
550 /* the current decoding frame/field */
551 va_pic = &pic_param->CurrPic;
552 obj_surface = decode_state->render_object;
553 assert(obj_surface->bo && obj_surface->private_data);
554 gen7_avc_surface = obj_surface->private_data;
556 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
557 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
560 OUT_BCS_BATCH(batch, 0);
561 OUT_BCS_BATCH(batch, 0);
564 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
565 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
568 assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
570 for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
571 va_pic = &pic_param->ReferenceFrames[j];
573 if (va_pic->flags & VA_PICTURE_H264_INVALID)
576 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
583 assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
585 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
586 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
588 OUT_BCS_BATCH(batch, 0);
589 OUT_BCS_BATCH(batch, 0);
593 va_pic = &pic_param->CurrPic;
594 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
595 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
597 ADVANCE_BCS_BATCH(batch);
601 gen8_mfd_avc_slice_state(VADriverContextP ctx,
602 VAPictureParameterBufferH264 *pic_param,
603 VASliceParameterBufferH264 *slice_param,
604 VASliceParameterBufferH264 *next_slice_param,
605 struct gen7_mfd_context *gen7_mfd_context)
607 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
608 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
609 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
610 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
611 int num_ref_idx_l0, num_ref_idx_l1;
612 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
613 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
614 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
617 if (slice_param->slice_type == SLICE_TYPE_I ||
618 slice_param->slice_type == SLICE_TYPE_SI) {
619 slice_type = SLICE_TYPE_I;
620 } else if (slice_param->slice_type == SLICE_TYPE_P ||
621 slice_param->slice_type == SLICE_TYPE_SP) {
622 slice_type = SLICE_TYPE_P;
624 assert(slice_param->slice_type == SLICE_TYPE_B);
625 slice_type = SLICE_TYPE_B;
628 if (slice_type == SLICE_TYPE_I) {
629 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
630 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
633 } else if (slice_type == SLICE_TYPE_P) {
634 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
635 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
638 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
639 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
642 first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
643 slice_hor_pos = first_mb_in_slice % width_in_mbs;
644 slice_ver_pos = first_mb_in_slice / width_in_mbs;
646 if (next_slice_param) {
647 first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
648 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
649 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
651 next_slice_hor_pos = 0;
652 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
655 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
656 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
657 OUT_BCS_BATCH(batch, slice_type);
659 (num_ref_idx_l1 << 24) |
660 (num_ref_idx_l0 << 16) |
661 (slice_param->chroma_log2_weight_denom << 8) |
662 (slice_param->luma_log2_weight_denom << 0));
664 (slice_param->direct_spatial_mv_pred_flag << 29) |
665 (slice_param->disable_deblocking_filter_idc << 27) |
666 (slice_param->cabac_init_idc << 24) |
667 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
668 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
669 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
671 (slice_ver_pos << 24) |
672 (slice_hor_pos << 16) |
673 (first_mb_in_slice << 0));
675 (next_slice_ver_pos << 16) |
676 (next_slice_hor_pos << 0));
678 (next_slice_param == NULL) << 19); /* last slice flag */
679 OUT_BCS_BATCH(batch, 0);
680 OUT_BCS_BATCH(batch, 0);
681 OUT_BCS_BATCH(batch, 0);
682 OUT_BCS_BATCH(batch, 0);
683 ADVANCE_BCS_BATCH(batch);
687 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
688 VAPictureParameterBufferH264 *pic_param,
689 VASliceParameterBufferH264 *slice_param,
690 struct gen7_mfd_context *gen7_mfd_context)
692 gen6_send_avc_ref_idx_state(
693 gen7_mfd_context->base.batch,
695 gen7_mfd_context->reference_surface
700 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
701 VAPictureParameterBufferH264 *pic_param,
702 VASliceParameterBufferH264 *slice_param,
703 struct gen7_mfd_context *gen7_mfd_context)
705 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
706 int i, j, num_weight_offset_table = 0;
707 short weightoffsets[32 * 6];
709 if ((slice_param->slice_type == SLICE_TYPE_P ||
710 slice_param->slice_type == SLICE_TYPE_SP) &&
711 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
712 num_weight_offset_table = 1;
715 if ((slice_param->slice_type == SLICE_TYPE_B) &&
716 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
717 num_weight_offset_table = 2;
720 for (i = 0; i < num_weight_offset_table; i++) {
721 BEGIN_BCS_BATCH(batch, 98);
722 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
723 OUT_BCS_BATCH(batch, i);
726 for (j = 0; j < 32; j++) {
727 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
728 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
729 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
730 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
731 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
732 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
735 for (j = 0; j < 32; j++) {
736 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
737 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
738 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
739 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
740 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
741 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
745 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
746 ADVANCE_BCS_BATCH(batch);
751 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
752 VAPictureParameterBufferH264 *pic_param,
753 VASliceParameterBufferH264 *slice_param,
754 dri_bo *slice_data_bo,
755 VASliceParameterBufferH264 *next_slice_param,
756 struct gen7_mfd_context *gen7_mfd_context)
758 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
759 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
761 pic_param->pic_fields.bits.entropy_coding_mode_flag);
763 /* the input bitsteam format on GEN7 differs from GEN6 */
764 BEGIN_BCS_BATCH(batch, 6);
765 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
767 (slice_param->slice_data_size));
768 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
776 ((slice_data_bit_offset >> 3) << 16) |
780 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
781 (slice_data_bit_offset & 0x7));
782 OUT_BCS_BATCH(batch, 0);
783 ADVANCE_BCS_BATCH(batch);
787 gen8_mfd_avc_context_init(
788 VADriverContextP ctx,
789 struct gen7_mfd_context *gen7_mfd_context
792 /* Initialize flat scaling lists */
793 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
797 gen8_mfd_avc_decode_init(VADriverContextP ctx,
798 struct decode_state *decode_state,
799 struct gen7_mfd_context *gen7_mfd_context)
801 VAPictureParameterBufferH264 *pic_param;
802 VASliceParameterBufferH264 *slice_param;
803 struct i965_driver_data *i965 = i965_driver_data(ctx);
804 struct object_surface *obj_surface;
806 int i, j, enable_avc_ildb = 0;
807 unsigned int width_in_mbs, height_in_mbs;
809 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
810 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
811 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
813 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
814 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
815 assert((slice_param->slice_type == SLICE_TYPE_I) ||
816 (slice_param->slice_type == SLICE_TYPE_SI) ||
817 (slice_param->slice_type == SLICE_TYPE_P) ||
818 (slice_param->slice_type == SLICE_TYPE_SP) ||
819 (slice_param->slice_type == SLICE_TYPE_B));
821 if (slice_param->disable_deblocking_filter_idc != 1) {
830 assert(decode_state->pic_param && decode_state->pic_param->buffer);
831 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
832 intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
833 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
834 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
835 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
836 assert(height_in_mbs > 0 && height_in_mbs <= 256);
838 /* Current decoded picture */
839 obj_surface = decode_state->render_object;
840 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
841 obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
843 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
844 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
846 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
847 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
848 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
849 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
851 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
852 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
853 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
854 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
856 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
857 bo = dri_bo_alloc(i965->intel.bufmgr,
862 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
863 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
865 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
866 bo = dri_bo_alloc(i965->intel.bufmgr,
867 "deblocking filter row store",
868 width_in_mbs * 64 * 4,
871 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
872 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
874 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
875 bo = dri_bo_alloc(i965->intel.bufmgr,
877 width_in_mbs * 64 * 2,
880 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
881 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
883 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
884 bo = dri_bo_alloc(i965->intel.bufmgr,
886 width_in_mbs * 64 * 2,
889 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
890 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
892 gen7_mfd_context->bitplane_read_buffer.valid = 0;
896 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
897 struct decode_state *decode_state,
898 struct gen7_mfd_context *gen7_mfd_context)
900 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
901 VAPictureParameterBufferH264 *pic_param;
902 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
903 dri_bo *slice_data_bo;
906 assert(decode_state->pic_param && decode_state->pic_param->buffer);
907 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
908 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
910 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
911 intel_batchbuffer_emit_mi_flush(batch);
912 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
913 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
914 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
915 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
916 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
917 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
918 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
920 for (j = 0; j < decode_state->num_slice_params; j++) {
921 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
922 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
923 slice_data_bo = decode_state->slice_datas[j]->bo;
924 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
926 if (j == decode_state->num_slice_params - 1)
927 next_slice_group_param = NULL;
929 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
931 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
932 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
933 assert((slice_param->slice_type == SLICE_TYPE_I) ||
934 (slice_param->slice_type == SLICE_TYPE_SI) ||
935 (slice_param->slice_type == SLICE_TYPE_P) ||
936 (slice_param->slice_type == SLICE_TYPE_SP) ||
937 (slice_param->slice_type == SLICE_TYPE_B));
939 if (i < decode_state->slice_params[j]->num_elements - 1)
940 next_slice_param = slice_param + 1;
942 next_slice_param = next_slice_group_param;
944 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
945 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
946 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
947 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
948 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
953 intel_batchbuffer_end_atomic(batch);
954 intel_batchbuffer_flush(batch);
958 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
959 struct decode_state *decode_state,
960 struct gen7_mfd_context *gen7_mfd_context)
962 VAPictureParameterBufferMPEG2 *pic_param;
963 struct i965_driver_data *i965 = i965_driver_data(ctx);
964 struct object_surface *obj_surface;
966 unsigned int width_in_mbs;
968 assert(decode_state->pic_param && decode_state->pic_param->buffer);
969 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
970 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
972 mpeg2_set_reference_surfaces(
974 gen7_mfd_context->reference_surface,
979 /* Current decoded picture */
980 obj_surface = decode_state->render_object;
981 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
983 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
984 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
985 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
986 gen7_mfd_context->pre_deblocking_output.valid = 1;
988 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
989 bo = dri_bo_alloc(i965->intel.bufmgr,
994 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
995 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
997 gen7_mfd_context->post_deblocking_output.valid = 0;
998 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
999 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1000 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1001 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1005 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1006 struct decode_state *decode_state,
1007 struct gen7_mfd_context *gen7_mfd_context)
1009 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1010 VAPictureParameterBufferMPEG2 *pic_param;
1011 unsigned int slice_concealment_disable_bit = 0;
1013 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1014 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1016 slice_concealment_disable_bit = 1;
1018 BEGIN_BCS_BATCH(batch, 13);
1019 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1020 OUT_BCS_BATCH(batch,
1021 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1022 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1023 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1024 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1025 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1026 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1027 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1028 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1029 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1030 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1031 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1032 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1033 OUT_BCS_BATCH(batch,
1034 pic_param->picture_coding_type << 9);
1035 OUT_BCS_BATCH(batch,
1036 (slice_concealment_disable_bit << 31) |
1037 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1038 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1039 OUT_BCS_BATCH(batch, 0);
1040 OUT_BCS_BATCH(batch, 0);
1041 OUT_BCS_BATCH(batch, 0);
1042 OUT_BCS_BATCH(batch, 0);
1043 OUT_BCS_BATCH(batch, 0);
1044 OUT_BCS_BATCH(batch, 0);
1045 OUT_BCS_BATCH(batch, 0);
1046 OUT_BCS_BATCH(batch, 0);
1047 OUT_BCS_BATCH(batch, 0);
1048 ADVANCE_BCS_BATCH(batch);
1052 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1053 struct decode_state *decode_state,
1054 struct gen7_mfd_context *gen7_mfd_context)
1056 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1059 /* Update internal QM state */
1060 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1061 VAIQMatrixBufferMPEG2 * const iq_matrix =
1062 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1064 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1065 iq_matrix->load_intra_quantiser_matrix) {
1066 gen_iq_matrix->load_intra_quantiser_matrix =
1067 iq_matrix->load_intra_quantiser_matrix;
1068 if (iq_matrix->load_intra_quantiser_matrix) {
1069 for (j = 0; j < 64; j++)
1070 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1071 iq_matrix->intra_quantiser_matrix[j];
1075 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1076 iq_matrix->load_non_intra_quantiser_matrix) {
1077 gen_iq_matrix->load_non_intra_quantiser_matrix =
1078 iq_matrix->load_non_intra_quantiser_matrix;
1079 if (iq_matrix->load_non_intra_quantiser_matrix) {
1080 for (j = 0; j < 64; j++)
1081 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1082 iq_matrix->non_intra_quantiser_matrix[j];
1087 /* Commit QM state to HW */
1088 for (i = 0; i < 2; i++) {
1089 unsigned char *qm = NULL;
1093 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1094 qm = gen_iq_matrix->intra_quantiser_matrix;
1095 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1098 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1099 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1100 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1107 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1112 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1113 VAPictureParameterBufferMPEG2 *pic_param,
1114 VASliceParameterBufferMPEG2 *slice_param,
1115 VASliceParameterBufferMPEG2 *next_slice_param,
1116 struct gen7_mfd_context *gen7_mfd_context)
1118 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1119 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1120 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1122 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1123 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1125 is_field_pic_wa = is_field_pic &&
1126 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1128 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1129 hpos0 = slice_param->slice_horizontal_position;
1131 if (next_slice_param == NULL) {
1132 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1135 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1136 hpos1 = next_slice_param->slice_horizontal_position;
1139 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1141 BEGIN_BCS_BATCH(batch, 5);
1142 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1143 OUT_BCS_BATCH(batch,
1144 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1145 OUT_BCS_BATCH(batch,
1146 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1147 OUT_BCS_BATCH(batch,
1151 (next_slice_param == NULL) << 5 |
1152 (next_slice_param == NULL) << 3 |
1153 (slice_param->macroblock_offset & 0x7));
1154 OUT_BCS_BATCH(batch,
1155 (slice_param->quantiser_scale_code << 24) |
1156 (vpos1 << 8 | hpos1));
1157 ADVANCE_BCS_BATCH(batch);
1161 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1162 struct decode_state *decode_state,
1163 struct gen7_mfd_context *gen7_mfd_context)
1165 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1166 VAPictureParameterBufferMPEG2 *pic_param;
1167 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1168 dri_bo *slice_data_bo;
1171 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1172 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1174 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1175 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1176 intel_batchbuffer_emit_mi_flush(batch);
1177 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1178 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1179 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1180 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1181 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1182 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1184 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1185 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1186 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1188 for (j = 0; j < decode_state->num_slice_params; j++) {
1189 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1190 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1191 slice_data_bo = decode_state->slice_datas[j]->bo;
1192 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1194 if (j == decode_state->num_slice_params - 1)
1195 next_slice_group_param = NULL;
1197 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1199 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1200 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1202 if (i < decode_state->slice_params[j]->num_elements - 1)
1203 next_slice_param = slice_param + 1;
1205 next_slice_param = next_slice_group_param;
1207 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1212 intel_batchbuffer_end_atomic(batch);
1213 intel_batchbuffer_flush(batch);
1216 static const int va_to_gen7_vc1_pic_type[5] = {
1220 GEN7_VC1_BI_PICTURE,
1224 static const int va_to_gen7_vc1_mv[4] = {
1226 2, /* 1-MV half-pel */
1227 3, /* 1-MV half-pef bilinear */
1231 static const int b_picture_scale_factor[21] = {
1232 128, 85, 170, 64, 192,
1233 51, 102, 153, 204, 43,
1234 215, 37, 74, 111, 148,
1235 185, 222, 32, 96, 160,
1239 static const int va_to_gen7_vc1_condover[3] = {
1245 static const int va_to_gen7_vc1_profile[4] = {
1246 GEN7_VC1_SIMPLE_PROFILE,
1247 GEN7_VC1_MAIN_PROFILE,
1248 GEN7_VC1_RESERVED_PROFILE,
1249 GEN7_VC1_ADVANCED_PROFILE
1253 gen8_mfd_free_vc1_surface(void **data)
1255 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1257 if (!gen7_vc1_surface)
1260 dri_bo_unreference(gen7_vc1_surface->dmv);
1261 free(gen7_vc1_surface);
1266 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1267 VAPictureParameterBufferVC1 *pic_param,
1268 struct object_surface *obj_surface)
1270 struct i965_driver_data *i965 = i965_driver_data(ctx);
1271 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1272 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1273 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1275 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1277 if (!gen7_vc1_surface) {
1278 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1279 assert((obj_surface->size & 0x3f) == 0);
1280 obj_surface->private_data = gen7_vc1_surface;
1283 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1285 if (gen7_vc1_surface->dmv == NULL) {
1286 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1287 "direct mv w/r buffer",
1288 width_in_mbs * height_in_mbs * 64,
1294 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1295 struct decode_state *decode_state,
1296 struct gen7_mfd_context *gen7_mfd_context)
1298 VAPictureParameterBufferVC1 *pic_param;
1299 struct i965_driver_data *i965 = i965_driver_data(ctx);
1300 struct object_surface *obj_surface;
1305 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1306 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1307 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1308 picture_type = pic_param->picture_fields.bits.picture_type;
1310 intel_update_vc1_frame_store_index(ctx,
1313 gen7_mfd_context->reference_surface);
1315 /* Current decoded picture */
1316 obj_surface = decode_state->render_object;
1317 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1318 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1320 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1321 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1322 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1323 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1325 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1326 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1327 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1328 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1330 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1331 bo = dri_bo_alloc(i965->intel.bufmgr,
1336 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1337 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1339 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1340 bo = dri_bo_alloc(i965->intel.bufmgr,
1341 "deblocking filter row store",
1342 width_in_mbs * 7 * 64,
1345 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1346 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1348 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1349 bo = dri_bo_alloc(i965->intel.bufmgr,
1350 "bsd mpc row store",
1354 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1355 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1357 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1359 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1360 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1362 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1363 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1364 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1365 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1367 uint8_t *src = NULL, *dst = NULL;
1369 assert(decode_state->bit_plane->buffer);
1370 src = decode_state->bit_plane->buffer;
1372 bo = dri_bo_alloc(i965->intel.bufmgr,
1374 bitplane_width * height_in_mbs,
1377 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1379 dri_bo_map(bo, True);
1380 assert(bo->virtual);
1383 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1384 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1385 int src_index, dst_index;
1389 src_index = (src_h * width_in_mbs + src_w) / 2;
1390 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1391 src_value = ((src[src_index] >> src_shift) & 0xf);
1393 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1397 dst_index = src_w / 2;
1398 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1402 dst[src_w / 2] >>= 4;
1404 dst += bitplane_width;
1409 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1413 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1414 struct decode_state *decode_state,
1415 struct gen7_mfd_context *gen7_mfd_context)
1417 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1418 VAPictureParameterBufferVC1 *pic_param;
1419 struct object_surface *obj_surface;
1420 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1421 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1422 int unified_mv_mode;
1423 int ref_field_pic_polarity = 0;
1424 int scale_factor = 0;
1426 int dmv_surface_valid = 0;
1432 int interpolation_mode = 0;
1434 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1435 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1437 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1438 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1439 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1440 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1441 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1442 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1443 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1444 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1447 alt_pquant_config = 0;
1448 alt_pquant_edge_mask = 0;
1449 } else if (dquant == 2) {
1450 alt_pquant_config = 1;
1451 alt_pquant_edge_mask = 0xf;
1453 assert(dquant == 1);
1454 if (dquantfrm == 0) {
1455 alt_pquant_config = 0;
1456 alt_pquant_edge_mask = 0;
1459 assert(dquantfrm == 1);
1460 alt_pquant_config = 1;
1462 switch (dqprofile) {
1464 if (dqbilevel == 0) {
1465 alt_pquant_config = 2;
1466 alt_pquant_edge_mask = 0;
1468 assert(dqbilevel == 1);
1469 alt_pquant_config = 3;
1470 alt_pquant_edge_mask = 0;
1475 alt_pquant_edge_mask = 0xf;
1480 alt_pquant_edge_mask = 0x9;
1482 alt_pquant_edge_mask = (0x3 << dqdbedge);
1487 alt_pquant_edge_mask = (0x1 << dqsbedge);
1496 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1497 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1498 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1500 assert(pic_param->mv_fields.bits.mv_mode < 4);
1501 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1504 if (pic_param->sequence_fields.bits.interlace == 1 &&
1505 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1506 /* FIXME: calculate reference field picture polarity */
1508 ref_field_pic_polarity = 0;
1511 if (pic_param->b_picture_fraction < 21)
1512 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1514 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1516 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1517 picture_type == GEN7_VC1_I_PICTURE)
1518 picture_type = GEN7_VC1_BI_PICTURE;
1520 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1521 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1523 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1526 * 8.3.6.2.1 Transform Type Selection
1527 * If variable-sized transform coding is not enabled,
1528 * then the 8x8 transform shall be used for all blocks.
1529 * it is also MFX_VC1_PIC_STATE requirement.
1531 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1532 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1533 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1537 if (picture_type == GEN7_VC1_B_PICTURE) {
1538 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1540 obj_surface = decode_state->reference_objects[1];
1543 gen7_vc1_surface = obj_surface->private_data;
1545 if (!gen7_vc1_surface ||
1546 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1547 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1548 dmv_surface_valid = 0;
1550 dmv_surface_valid = 1;
1553 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1555 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1556 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1558 if (pic_param->picture_fields.bits.top_field_first)
1564 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1565 brfd = pic_param->reference_fields.bits.reference_distance;
1566 brfd = (scale_factor * brfd) >> 8;
1567 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1574 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1575 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1576 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1580 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1581 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1584 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1585 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1586 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1588 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1589 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1595 assert(pic_param->conditional_overlap_flag < 3);
1596 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1598 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1599 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1600 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1601 interpolation_mode = 9; /* Half-pel bilinear */
1602 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1603 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1604 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1605 interpolation_mode = 1; /* Half-pel bicubic */
1607 interpolation_mode = 0; /* Quarter-pel bicubic */
1609 BEGIN_BCS_BATCH(batch, 6);
1610 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1611 OUT_BCS_BATCH(batch,
1612 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1613 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1614 OUT_BCS_BATCH(batch,
1615 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1616 dmv_surface_valid << 15 |
1617 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1618 pic_param->rounding_control << 13 |
1619 pic_param->sequence_fields.bits.syncmarker << 12 |
1620 interpolation_mode << 8 |
1621 0 << 7 | /* FIXME: scale up or down ??? */
1622 pic_param->range_reduction_frame << 6 |
1623 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1625 !pic_param->picture_fields.bits.is_first_field << 3 |
1626 (pic_param->sequence_fields.bits.profile == 3) << 0);
1627 OUT_BCS_BATCH(batch,
1628 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1629 picture_type << 26 |
1632 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1634 OUT_BCS_BATCH(batch,
1635 unified_mv_mode << 28 |
1636 pic_param->mv_fields.bits.four_mv_switch << 27 |
1637 pic_param->fast_uvmc_flag << 26 |
1638 ref_field_pic_polarity << 25 |
1639 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1640 pic_param->reference_fields.bits.reference_distance << 20 |
1641 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1642 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1643 pic_param->mv_fields.bits.extended_mv_range << 8 |
1644 alt_pquant_edge_mask << 4 |
1645 alt_pquant_config << 2 |
1646 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1647 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1648 OUT_BCS_BATCH(batch,
1649 !!pic_param->bitplane_present.value << 31 |
1650 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1651 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1652 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1653 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1654 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1655 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1656 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1657 pic_param->mv_fields.bits.mv_table << 20 |
1658 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1659 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1660 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1661 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1662 pic_param->mb_mode_table << 8 |
1664 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1665 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1666 pic_param->cbp_table << 0);
1667 ADVANCE_BCS_BATCH(batch);
1671 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1672 struct decode_state *decode_state,
1673 struct gen7_mfd_context *gen7_mfd_context)
1675 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1676 VAPictureParameterBufferVC1 *pic_param;
1677 int intensitycomp_single;
1679 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1680 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1682 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1683 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1684 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1686 BEGIN_BCS_BATCH(batch, 6);
1687 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1688 OUT_BCS_BATCH(batch,
1689 0 << 14 | /* FIXME: double ??? */
1691 intensitycomp_single << 10 |
1692 intensitycomp_single << 8 |
1693 0 << 4 | /* FIXME: interlace mode */
1695 OUT_BCS_BATCH(batch,
1696 pic_param->luma_shift << 16 |
1697 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1698 OUT_BCS_BATCH(batch, 0);
1699 OUT_BCS_BATCH(batch, 0);
1700 OUT_BCS_BATCH(batch, 0);
1701 ADVANCE_BCS_BATCH(batch);
1705 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1706 struct decode_state *decode_state,
1707 struct gen7_mfd_context *gen7_mfd_context)
1709 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1710 struct object_surface *obj_surface;
1711 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1713 obj_surface = decode_state->render_object;
1715 if (obj_surface && obj_surface->private_data) {
1716 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1719 obj_surface = decode_state->reference_objects[1];
1721 if (obj_surface && obj_surface->private_data) {
1722 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1725 BEGIN_BCS_BATCH(batch, 7);
1726 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1728 if (dmv_write_buffer)
1729 OUT_BCS_RELOC(batch, dmv_write_buffer,
1730 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1733 OUT_BCS_BATCH(batch, 0);
1735 OUT_BCS_BATCH(batch, 0);
1736 OUT_BCS_BATCH(batch, 0);
1738 if (dmv_read_buffer)
1739 OUT_BCS_RELOC(batch, dmv_read_buffer,
1740 I915_GEM_DOMAIN_INSTRUCTION, 0,
1743 OUT_BCS_BATCH(batch, 0);
1745 OUT_BCS_BATCH(batch, 0);
1746 OUT_BCS_BATCH(batch, 0);
1748 ADVANCE_BCS_BATCH(batch);
1752 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1754 int out_slice_data_bit_offset;
1755 int slice_header_size = in_slice_data_bit_offset / 8;
1759 out_slice_data_bit_offset = in_slice_data_bit_offset;
1761 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1762 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1767 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1770 return out_slice_data_bit_offset;
1774 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1775 VAPictureParameterBufferVC1 *pic_param,
1776 VASliceParameterBufferVC1 *slice_param,
1777 VASliceParameterBufferVC1 *next_slice_param,
1778 dri_bo *slice_data_bo,
1779 struct gen7_mfd_context *gen7_mfd_context)
1781 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1782 int next_slice_start_vert_pos;
1783 int macroblock_offset;
1784 uint8_t *slice_data = NULL;
1786 dri_bo_map(slice_data_bo, 0);
1787 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1788 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1789 slice_param->macroblock_offset,
1790 pic_param->sequence_fields.bits.profile);
1791 dri_bo_unmap(slice_data_bo);
1793 if (next_slice_param)
1794 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1796 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1798 BEGIN_BCS_BATCH(batch, 5);
1799 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1800 OUT_BCS_BATCH(batch,
1801 slice_param->slice_data_size - (macroblock_offset >> 3));
1802 OUT_BCS_BATCH(batch,
1803 slice_param->slice_data_offset + (macroblock_offset >> 3));
1804 OUT_BCS_BATCH(batch,
1805 slice_param->slice_vertical_position << 16 |
1806 next_slice_start_vert_pos << 0);
1807 OUT_BCS_BATCH(batch,
1808 (macroblock_offset & 0x7));
1809 ADVANCE_BCS_BATCH(batch);
1813 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1814 struct decode_state *decode_state,
1815 struct gen7_mfd_context *gen7_mfd_context)
1817 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1818 VAPictureParameterBufferVC1 *pic_param;
1819 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1820 dri_bo *slice_data_bo;
1823 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1824 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1826 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1827 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1828 intel_batchbuffer_emit_mi_flush(batch);
1829 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1830 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1831 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1832 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1833 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1834 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1835 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1837 for (j = 0; j < decode_state->num_slice_params; j++) {
1838 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1839 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1840 slice_data_bo = decode_state->slice_datas[j]->bo;
1841 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1843 if (j == decode_state->num_slice_params - 1)
1844 next_slice_group_param = NULL;
1846 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1848 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1849 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1851 if (i < decode_state->slice_params[j]->num_elements - 1)
1852 next_slice_param = slice_param + 1;
1854 next_slice_param = next_slice_group_param;
1856 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1861 intel_batchbuffer_end_atomic(batch);
1862 intel_batchbuffer_flush(batch);
1866 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1867 struct decode_state *decode_state,
1868 struct gen7_mfd_context *gen7_mfd_context)
1870 struct object_surface *obj_surface;
1871 VAPictureParameterBufferJPEGBaseline *pic_param;
1872 int subsampling = SUBSAMPLE_YUV420;
1873 int fourcc = VA_FOURCC_IMC3;
1875 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1877 if (pic_param->num_components == 1)
1878 subsampling = SUBSAMPLE_YUV400;
1879 else if (pic_param->num_components == 3) {
1880 int h1 = pic_param->components[0].h_sampling_factor;
1881 int h2 = pic_param->components[1].h_sampling_factor;
1882 int h3 = pic_param->components[2].h_sampling_factor;
1883 int v1 = pic_param->components[0].v_sampling_factor;
1884 int v2 = pic_param->components[1].v_sampling_factor;
1885 int v3 = pic_param->components[2].v_sampling_factor;
1887 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1888 v1 == 2 && v2 == 1 && v3 == 1) {
1889 subsampling = SUBSAMPLE_YUV420;
1890 fourcc = VA_FOURCC_IMC3;
1891 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1892 v1 == 1 && v2 == 1 && v3 == 1) {
1893 subsampling = SUBSAMPLE_YUV422H;
1894 fourcc = VA_FOURCC_422H;
1895 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1896 v1 == 1 && v2 == 1 && v3 == 1) {
1897 subsampling = SUBSAMPLE_YUV444;
1898 fourcc = VA_FOURCC_444P;
1899 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1900 v1 == 1 && v2 == 1 && v3 == 1) {
1901 subsampling = SUBSAMPLE_YUV411;
1902 fourcc = VA_FOURCC_411P;
1903 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1904 v1 == 2 && v2 == 1 && v3 == 1) {
1905 subsampling = SUBSAMPLE_YUV422V;
1906 fourcc = VA_FOURCC_422V;
1907 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1908 v1 == 2 && v2 == 2 && v3 == 2) {
1909 subsampling = SUBSAMPLE_YUV422H;
1910 fourcc = VA_FOURCC_422H;
1911 } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1912 v1 == 2 && v2 == 1 && v3 == 1) {
1913 subsampling = SUBSAMPLE_YUV422V;
1914 fourcc = VA_FOURCC_422V;
1922 /* Current decoded picture */
1923 obj_surface = decode_state->render_object;
1924 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1926 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1927 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1928 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1929 gen7_mfd_context->pre_deblocking_output.valid = 1;
1931 gen7_mfd_context->post_deblocking_output.bo = NULL;
1932 gen7_mfd_context->post_deblocking_output.valid = 0;
1934 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1935 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1937 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1938 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1940 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1941 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1943 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1944 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1946 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1947 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1950 static const int va_to_gen7_jpeg_rotation[4] = {
1951 GEN7_JPEG_ROTATION_0,
1952 GEN7_JPEG_ROTATION_90,
1953 GEN7_JPEG_ROTATION_180,
1954 GEN7_JPEG_ROTATION_270
1958 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1959 struct decode_state *decode_state,
1960 struct gen7_mfd_context *gen7_mfd_context)
1962 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1963 VAPictureParameterBufferJPEGBaseline *pic_param;
1964 int chroma_type = GEN7_YUV420;
1965 int frame_width_in_blks;
1966 int frame_height_in_blks;
1968 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1969 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1971 if (pic_param->num_components == 1)
1972 chroma_type = GEN7_YUV400;
1973 else if (pic_param->num_components == 3) {
1974 int h1 = pic_param->components[0].h_sampling_factor;
1975 int h2 = pic_param->components[1].h_sampling_factor;
1976 int h3 = pic_param->components[2].h_sampling_factor;
1977 int v1 = pic_param->components[0].v_sampling_factor;
1978 int v2 = pic_param->components[1].v_sampling_factor;
1979 int v3 = pic_param->components[2].v_sampling_factor;
1981 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1982 v1 == 2 && v2 == 1 && v3 == 1)
1983 chroma_type = GEN7_YUV420;
1984 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1985 v1 == 1 && v2 == 1 && v3 == 1)
1986 chroma_type = GEN7_YUV422H_2Y;
1987 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1988 v1 == 1 && v2 == 1 && v3 == 1)
1989 chroma_type = GEN7_YUV444;
1990 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1991 v1 == 1 && v2 == 1 && v3 == 1)
1992 chroma_type = GEN7_YUV411;
1993 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1994 v1 == 2 && v2 == 1 && v3 == 1)
1995 chroma_type = GEN7_YUV422V_2Y;
1996 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1997 v1 == 2 && v2 == 2 && v3 == 2)
1998 chroma_type = GEN7_YUV422H_4Y;
1999 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2000 v1 == 2 && v2 == 1 && v3 == 1)
2001 chroma_type = GEN7_YUV422V_4Y;
2006 if (chroma_type == GEN7_YUV400 ||
2007 chroma_type == GEN7_YUV444 ||
2008 chroma_type == GEN7_YUV422V_2Y) {
2009 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2010 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2011 } else if (chroma_type == GEN7_YUV411) {
2012 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2013 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2015 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2016 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2019 BEGIN_BCS_BATCH(batch, 3);
2020 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2021 OUT_BCS_BATCH(batch,
2022 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2023 (chroma_type << 0));
2024 OUT_BCS_BATCH(batch,
2025 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2026 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2027 ADVANCE_BCS_BATCH(batch);
2030 static const int va_to_gen7_jpeg_hufftable[2] = {
2036 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2037 struct decode_state *decode_state,
2038 struct gen7_mfd_context *gen7_mfd_context,
2041 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2042 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2045 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2048 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2050 for (index = 0; index < num_tables; index++) {
2051 int id = va_to_gen7_jpeg_hufftable[index];
2052 if (!huffman_table->load_huffman_table[index])
2054 BEGIN_BCS_BATCH(batch, 53);
2055 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2056 OUT_BCS_BATCH(batch, id);
2057 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2058 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2059 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2060 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2061 ADVANCE_BCS_BATCH(batch);
2065 static const int va_to_gen7_jpeg_qm[5] = {
2067 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2068 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2069 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2070 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2074 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2075 struct decode_state *decode_state,
2076 struct gen7_mfd_context *gen7_mfd_context)
2078 VAPictureParameterBufferJPEGBaseline *pic_param;
2079 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2082 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2085 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2086 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2088 assert(pic_param->num_components <= 3);
2090 for (index = 0; index < pic_param->num_components; index++) {
2091 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2093 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2094 unsigned char raster_qm[64];
2097 if (id > 4 || id < 1)
2100 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2103 qm_type = va_to_gen7_jpeg_qm[id];
2105 for (j = 0; j < 64; j++)
2106 raster_qm[zigzag_direct[j]] = qm[j];
2108 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2113 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2114 VAPictureParameterBufferJPEGBaseline *pic_param,
2115 VASliceParameterBufferJPEGBaseline *slice_param,
2116 VASliceParameterBufferJPEGBaseline *next_slice_param,
2117 dri_bo *slice_data_bo,
2118 struct gen7_mfd_context *gen7_mfd_context)
2120 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2121 int scan_component_mask = 0;
2124 assert(slice_param->num_components > 0);
2125 assert(slice_param->num_components < 4);
2126 assert(slice_param->num_components <= pic_param->num_components);
2128 for (i = 0; i < slice_param->num_components; i++) {
2129 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2131 scan_component_mask |= (1 << 0);
2134 scan_component_mask |= (1 << 1);
2137 scan_component_mask |= (1 << 2);
2145 BEGIN_BCS_BATCH(batch, 6);
2146 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2147 OUT_BCS_BATCH(batch,
2148 slice_param->slice_data_size);
2149 OUT_BCS_BATCH(batch,
2150 slice_param->slice_data_offset);
2151 OUT_BCS_BATCH(batch,
2152 slice_param->slice_horizontal_position << 16 |
2153 slice_param->slice_vertical_position << 0);
2154 OUT_BCS_BATCH(batch,
2155 ((slice_param->num_components != 1) << 30) | /* interleaved */
2156 (scan_component_mask << 27) | /* scan components */
2157 (0 << 26) | /* disable interrupt allowed */
2158 (slice_param->num_mcus << 0)); /* MCU count */
2159 OUT_BCS_BATCH(batch,
2160 (slice_param->restart_interval << 0)); /* RestartInterval */
2161 ADVANCE_BCS_BATCH(batch);
2164 /* Workaround for JPEG decoding on Ivybridge */
2170 unsigned char data[32];
2172 int data_bit_offset;
2174 } gen7_jpeg_wa_clip = {
2178 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2179 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2187 gen8_jpeg_wa_init(VADriverContextP ctx,
2188 struct gen7_mfd_context *gen7_mfd_context)
2190 struct i965_driver_data *i965 = i965_driver_data(ctx);
2192 struct object_surface *obj_surface;
2194 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2195 i965_DestroySurfaces(ctx,
2196 &gen7_mfd_context->jpeg_wa_surface_id,
2199 status = i965_CreateSurfaces(ctx,
2200 gen7_jpeg_wa_clip.width,
2201 gen7_jpeg_wa_clip.height,
2202 VA_RT_FORMAT_YUV420,
2204 &gen7_mfd_context->jpeg_wa_surface_id);
2205 assert(status == VA_STATUS_SUCCESS);
2207 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2208 assert(obj_surface);
2209 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2210 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2212 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2213 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2217 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2219 gen7_jpeg_wa_clip.data_size,
2220 gen7_jpeg_wa_clip.data);
2225 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2226 struct gen7_mfd_context *gen7_mfd_context)
2228 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2230 BEGIN_BCS_BATCH(batch, 5);
2231 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2232 OUT_BCS_BATCH(batch,
2233 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2234 (MFD_MODE_VLD << 15) | /* VLD mode */
2235 (0 << 10) | /* disable Stream-Out */
2236 (0 << 9) | /* Post Deblocking Output */
2237 (1 << 8) | /* Pre Deblocking Output */
2238 (0 << 5) | /* not in stitch mode */
2239 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2240 (MFX_FORMAT_AVC << 0));
2241 OUT_BCS_BATCH(batch,
2242 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2243 (0 << 3) | /* terminate if AVC mbdata error occurs */
2244 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2247 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2248 OUT_BCS_BATCH(batch, 0); /* reserved */
2249 ADVANCE_BCS_BATCH(batch);
2253 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2254 struct gen7_mfd_context *gen7_mfd_context)
2256 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2257 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2259 BEGIN_BCS_BATCH(batch, 6);
2260 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2261 OUT_BCS_BATCH(batch, 0);
2262 OUT_BCS_BATCH(batch,
2263 ((obj_surface->orig_width - 1) << 18) |
2264 ((obj_surface->orig_height - 1) << 4));
2265 OUT_BCS_BATCH(batch,
2266 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2267 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2268 (0 << 22) | /* surface object control state, ignored */
2269 ((obj_surface->width - 1) << 3) | /* pitch */
2270 (0 << 2) | /* must be 0 */
2271 (1 << 1) | /* must be tiled */
2272 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2273 OUT_BCS_BATCH(batch,
2274 (0 << 16) | /* X offset for U(Cb), must be 0 */
2275 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2276 OUT_BCS_BATCH(batch,
2277 (0 << 16) | /* X offset for V(Cr), must be 0 */
2278 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2279 ADVANCE_BCS_BATCH(batch);
2283 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2284 struct gen7_mfd_context *gen7_mfd_context)
2286 struct i965_driver_data *i965 = i965_driver_data(ctx);
2287 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2288 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2292 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2297 BEGIN_BCS_BATCH(batch, 61);
2298 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2299 OUT_BCS_RELOC(batch,
2301 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2303 OUT_BCS_BATCH(batch, 0);
2304 OUT_BCS_BATCH(batch, 0);
2307 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2308 OUT_BCS_BATCH(batch, 0);
2309 OUT_BCS_BATCH(batch, 0);
2311 /* uncompressed-video & stream out 7-12 */
2312 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2313 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2314 OUT_BCS_BATCH(batch, 0);
2315 OUT_BCS_BATCH(batch, 0);
2316 OUT_BCS_BATCH(batch, 0);
2317 OUT_BCS_BATCH(batch, 0);
2319 /* the DW 13-15 is for intra row store scratch */
2320 OUT_BCS_RELOC(batch,
2322 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2324 OUT_BCS_BATCH(batch, 0);
2325 OUT_BCS_BATCH(batch, 0);
2327 /* the DW 16-18 is for deblocking filter */
2328 OUT_BCS_BATCH(batch, 0);
2329 OUT_BCS_BATCH(batch, 0);
2330 OUT_BCS_BATCH(batch, 0);
2333 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2334 OUT_BCS_BATCH(batch, 0);
2335 OUT_BCS_BATCH(batch, 0);
2337 OUT_BCS_BATCH(batch, 0);
2339 /* the DW52-54 is for mb status address */
2340 OUT_BCS_BATCH(batch, 0);
2341 OUT_BCS_BATCH(batch, 0);
2342 OUT_BCS_BATCH(batch, 0);
2343 /* the DW56-60 is for ILDB & second ILDB address */
2344 OUT_BCS_BATCH(batch, 0);
2345 OUT_BCS_BATCH(batch, 0);
2346 OUT_BCS_BATCH(batch, 0);
2347 OUT_BCS_BATCH(batch, 0);
2348 OUT_BCS_BATCH(batch, 0);
2349 OUT_BCS_BATCH(batch, 0);
2351 ADVANCE_BCS_BATCH(batch);
2353 dri_bo_unreference(intra_bo);
2357 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2358 struct gen7_mfd_context *gen7_mfd_context)
2360 struct i965_driver_data *i965 = i965_driver_data(ctx);
2361 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2362 dri_bo *bsd_mpc_bo, *mpr_bo;
2364 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2365 "bsd mpc row store",
2366 11520, /* 1.5 * 120 * 64 */
2369 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2371 7680, /* 1. 0 * 120 * 64 */
2374 BEGIN_BCS_BATCH(batch, 10);
2375 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2377 OUT_BCS_RELOC(batch,
2379 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2382 OUT_BCS_BATCH(batch, 0);
2383 OUT_BCS_BATCH(batch, 0);
2385 OUT_BCS_RELOC(batch,
2387 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2389 OUT_BCS_BATCH(batch, 0);
2390 OUT_BCS_BATCH(batch, 0);
2392 OUT_BCS_BATCH(batch, 0);
2393 OUT_BCS_BATCH(batch, 0);
2394 OUT_BCS_BATCH(batch, 0);
2396 ADVANCE_BCS_BATCH(batch);
2398 dri_bo_unreference(bsd_mpc_bo);
2399 dri_bo_unreference(mpr_bo);
2403 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2404 struct gen7_mfd_context *gen7_mfd_context)
2410 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2411 struct gen7_mfd_context *gen7_mfd_context)
2413 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2415 int mbaff_frame_flag = 0;
2416 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2418 BEGIN_BCS_BATCH(batch, 16);
2419 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2420 OUT_BCS_BATCH(batch,
2421 width_in_mbs * height_in_mbs);
2422 OUT_BCS_BATCH(batch,
2423 ((height_in_mbs - 1) << 16) |
2424 ((width_in_mbs - 1) << 0));
2425 OUT_BCS_BATCH(batch,
2430 (0 << 12) | /* differ from GEN6 */
2433 OUT_BCS_BATCH(batch,
2434 (1 << 10) | /* 4:2:0 */
2435 (1 << 7) | /* CABAC */
2441 (mbaff_frame_flag << 1) |
2443 OUT_BCS_BATCH(batch, 0);
2444 OUT_BCS_BATCH(batch, 0);
2445 OUT_BCS_BATCH(batch, 0);
2446 OUT_BCS_BATCH(batch, 0);
2447 OUT_BCS_BATCH(batch, 0);
2448 OUT_BCS_BATCH(batch, 0);
2449 OUT_BCS_BATCH(batch, 0);
2450 OUT_BCS_BATCH(batch, 0);
2451 OUT_BCS_BATCH(batch, 0);
2452 OUT_BCS_BATCH(batch, 0);
2453 OUT_BCS_BATCH(batch, 0);
2454 ADVANCE_BCS_BATCH(batch);
2458 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2459 struct gen7_mfd_context *gen7_mfd_context)
2461 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2464 BEGIN_BCS_BATCH(batch, 71);
2465 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2467 /* reference surfaces 0..15 */
2468 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2469 OUT_BCS_BATCH(batch, 0); /* top */
2470 OUT_BCS_BATCH(batch, 0); /* bottom */
2473 OUT_BCS_BATCH(batch, 0);
2475 /* the current decoding frame/field */
2476 OUT_BCS_BATCH(batch, 0); /* top */
2477 OUT_BCS_BATCH(batch, 0);
2478 OUT_BCS_BATCH(batch, 0);
2481 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2482 OUT_BCS_BATCH(batch, 0);
2483 OUT_BCS_BATCH(batch, 0);
2486 OUT_BCS_BATCH(batch, 0);
2487 OUT_BCS_BATCH(batch, 0);
2489 ADVANCE_BCS_BATCH(batch);
2493 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2494 struct gen7_mfd_context *gen7_mfd_context)
2496 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2498 BEGIN_BCS_BATCH(batch, 11);
2499 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2500 OUT_BCS_RELOC(batch,
2501 gen7_mfd_context->jpeg_wa_slice_data_bo,
2502 I915_GEM_DOMAIN_INSTRUCTION, 0,
2504 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2505 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2506 OUT_BCS_BATCH(batch, 0);
2507 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2508 OUT_BCS_BATCH(batch, 0);
2509 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2510 OUT_BCS_BATCH(batch, 0);
2511 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2512 OUT_BCS_BATCH(batch, 0);
2513 ADVANCE_BCS_BATCH(batch);
2517 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2518 struct gen7_mfd_context *gen7_mfd_context)
2520 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2522 /* the input bitsteam format on GEN7 differs from GEN6 */
2523 BEGIN_BCS_BATCH(batch, 6);
2524 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2525 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2526 OUT_BCS_BATCH(batch, 0);
2527 OUT_BCS_BATCH(batch,
2533 OUT_BCS_BATCH(batch,
2534 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2537 (1 << 3) | /* LastSlice Flag */
2538 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2539 OUT_BCS_BATCH(batch, 0);
2540 ADVANCE_BCS_BATCH(batch);
2544 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2545 struct gen7_mfd_context *gen7_mfd_context)
2547 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2548 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2549 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2550 int first_mb_in_slice = 0;
2551 int slice_type = SLICE_TYPE_I;
2553 BEGIN_BCS_BATCH(batch, 11);
2554 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2555 OUT_BCS_BATCH(batch, slice_type);
2556 OUT_BCS_BATCH(batch,
2557 (num_ref_idx_l1 << 24) |
2558 (num_ref_idx_l0 << 16) |
2561 OUT_BCS_BATCH(batch,
2563 (1 << 27) | /* disable Deblocking */
2565 (gen7_jpeg_wa_clip.qp << 16) |
2568 OUT_BCS_BATCH(batch,
2569 (slice_ver_pos << 24) |
2570 (slice_hor_pos << 16) |
2571 (first_mb_in_slice << 0));
2572 OUT_BCS_BATCH(batch,
2573 (next_slice_ver_pos << 16) |
2574 (next_slice_hor_pos << 0));
2575 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2576 OUT_BCS_BATCH(batch, 0);
2577 OUT_BCS_BATCH(batch, 0);
2578 OUT_BCS_BATCH(batch, 0);
2579 OUT_BCS_BATCH(batch, 0);
2580 ADVANCE_BCS_BATCH(batch);
2584 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2585 struct gen7_mfd_context *gen7_mfd_context)
2587 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2588 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2589 intel_batchbuffer_emit_mi_flush(batch);
2590 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2591 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2592 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2593 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2594 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2595 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2596 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2598 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2599 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2600 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2606 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2607 struct decode_state *decode_state,
2608 struct gen7_mfd_context *gen7_mfd_context)
2610 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2611 VAPictureParameterBufferJPEGBaseline *pic_param;
2612 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2613 dri_bo *slice_data_bo;
2614 int i, j, max_selector = 0;
2616 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2617 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2619 /* Currently only support Baseline DCT */
2620 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2621 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2623 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2625 intel_batchbuffer_emit_mi_flush(batch);
2626 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2627 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2628 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2629 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2630 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2632 for (j = 0; j < decode_state->num_slice_params; j++) {
2633 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2634 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2635 slice_data_bo = decode_state->slice_datas[j]->bo;
2636 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2638 if (j == decode_state->num_slice_params - 1)
2639 next_slice_group_param = NULL;
2641 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2643 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2646 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2648 if (i < decode_state->slice_params[j]->num_elements - 1)
2649 next_slice_param = slice_param + 1;
2651 next_slice_param = next_slice_group_param;
2653 for (component = 0; component < slice_param->num_components; component++) {
2654 if (max_selector < slice_param->components[component].dc_table_selector)
2655 max_selector = slice_param->components[component].dc_table_selector;
2657 if (max_selector < slice_param->components[component].ac_table_selector)
2658 max_selector = slice_param->components[component].ac_table_selector;
2665 assert(max_selector < 2);
2666 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2668 for (j = 0; j < decode_state->num_slice_params; j++) {
2669 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2670 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2671 slice_data_bo = decode_state->slice_datas[j]->bo;
2672 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2674 if (j == decode_state->num_slice_params - 1)
2675 next_slice_group_param = NULL;
2677 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2679 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2680 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2682 if (i < decode_state->slice_params[j]->num_elements - 1)
2683 next_slice_param = slice_param + 1;
2685 next_slice_param = next_slice_group_param;
2687 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2692 intel_batchbuffer_end_atomic(batch);
2693 intel_batchbuffer_flush(batch);
2696 static const int vp8_dc_qlookup[128] =
2698 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2699 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2700 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2701 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2702 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2703 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2704 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2705 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2708 static const int vp8_ac_qlookup[128] =
2710 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2711 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2712 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2713 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2714 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2715 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2716 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2717 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2720 static inline unsigned int vp8_clip_quantization_index(int index)
2731 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2732 struct decode_state *decode_state,
2733 struct gen7_mfd_context *gen7_mfd_context)
2735 struct object_surface *obj_surface;
2736 struct i965_driver_data *i965 = i965_driver_data(ctx);
2738 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2739 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2740 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2742 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2743 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2745 intel_update_vp8_frame_store_index(ctx,
2748 gen7_mfd_context->reference_surface);
2750 /* Current decoded picture */
2751 obj_surface = decode_state->render_object;
2752 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2754 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2755 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2756 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2757 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2759 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2760 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2761 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2762 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2764 intel_ensure_vp8_segmentation_buffer(ctx,
2765 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2767 /* The same as AVC */
2768 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2769 bo = dri_bo_alloc(i965->intel.bufmgr,
2774 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2775 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2777 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2778 bo = dri_bo_alloc(i965->intel.bufmgr,
2779 "deblocking filter row store",
2780 width_in_mbs * 64 * 4,
2783 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2784 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2786 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2787 bo = dri_bo_alloc(i965->intel.bufmgr,
2788 "bsd mpc row store",
2789 width_in_mbs * 64 * 2,
2792 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2793 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2795 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2796 bo = dri_bo_alloc(i965->intel.bufmgr,
2798 width_in_mbs * 64 * 2,
2801 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2802 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2804 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2808 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2809 struct decode_state *decode_state,
2810 struct gen7_mfd_context *gen7_mfd_context)
2812 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2813 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2814 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2815 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2816 dri_bo *probs_bo = decode_state->probability_data->bo;
2818 unsigned int quantization_value[4][6];
2820 /* There is no safe way to error out if the segmentation buffer
2821 could not be allocated. So, instead of aborting, simply decode
2822 something even if the result may look totally inacurate */
2823 const unsigned int enable_segmentation =
2824 pic_param->pic_fields.bits.segmentation_enabled &&
2825 gen7_mfd_context->segmentation_buffer.valid;
2827 log2num = (int)log2(slice_param->num_of_partitions - 1);
2829 BEGIN_BCS_BATCH(batch, 38);
2830 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2831 OUT_BCS_BATCH(batch,
2832 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2833 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2834 OUT_BCS_BATCH(batch,
2836 pic_param->pic_fields.bits.sharpness_level << 16 |
2837 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2838 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2839 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2840 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2841 pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2842 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2843 (enable_segmentation &&
2844 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2845 (enable_segmentation &&
2846 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2847 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2848 pic_param->pic_fields.bits.filter_type << 4 |
2849 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2850 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2852 OUT_BCS_BATCH(batch,
2853 pic_param->loop_filter_level[3] << 24 |
2854 pic_param->loop_filter_level[2] << 16 |
2855 pic_param->loop_filter_level[1] << 8 |
2856 pic_param->loop_filter_level[0] << 0);
2858 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2859 for (i = 0; i < 4; i++) {
2860 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2861 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2862 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2863 /* 101581>>16 is equivalent to 155/100 */
2864 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2865 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2866 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2868 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2869 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2871 OUT_BCS_BATCH(batch,
2872 quantization_value[i][0] << 16 | /* Y1AC */
2873 quantization_value[i][1] << 0); /* Y1DC */
2874 OUT_BCS_BATCH(batch,
2875 quantization_value[i][5] << 16 | /* UVAC */
2876 quantization_value[i][4] << 0); /* UVDC */
2877 OUT_BCS_BATCH(batch,
2878 quantization_value[i][3] << 16 | /* Y2AC */
2879 quantization_value[i][2] << 0); /* Y2DC */
2882 /* CoeffProbability table for non-key frame, DW16-DW18 */
2884 OUT_BCS_RELOC(batch, probs_bo,
2885 0, I915_GEM_DOMAIN_INSTRUCTION,
2887 OUT_BCS_BATCH(batch, 0);
2888 OUT_BCS_BATCH(batch, 0);
2890 OUT_BCS_BATCH(batch, 0);
2891 OUT_BCS_BATCH(batch, 0);
2892 OUT_BCS_BATCH(batch, 0);
2895 OUT_BCS_BATCH(batch,
2896 pic_param->mb_segment_tree_probs[2] << 16 |
2897 pic_param->mb_segment_tree_probs[1] << 8 |
2898 pic_param->mb_segment_tree_probs[0] << 0);
2900 OUT_BCS_BATCH(batch,
2901 pic_param->prob_skip_false << 24 |
2902 pic_param->prob_intra << 16 |
2903 pic_param->prob_last << 8 |
2904 pic_param->prob_gf << 0);
2906 OUT_BCS_BATCH(batch,
2907 pic_param->y_mode_probs[3] << 24 |
2908 pic_param->y_mode_probs[2] << 16 |
2909 pic_param->y_mode_probs[1] << 8 |
2910 pic_param->y_mode_probs[0] << 0);
2912 OUT_BCS_BATCH(batch,
2913 pic_param->uv_mode_probs[2] << 16 |
2914 pic_param->uv_mode_probs[1] << 8 |
2915 pic_param->uv_mode_probs[0] << 0);
2917 /* MV update value, DW23-DW32 */
2918 for (i = 0; i < 2; i++) {
2919 for (j = 0; j < 20; j += 4) {
2920 OUT_BCS_BATCH(batch,
2921 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2922 pic_param->mv_probs[i][j + 2] << 16 |
2923 pic_param->mv_probs[i][j + 1] << 8 |
2924 pic_param->mv_probs[i][j + 0] << 0);
2928 OUT_BCS_BATCH(batch,
2929 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2930 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2931 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2932 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2934 OUT_BCS_BATCH(batch,
2935 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2936 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2937 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2938 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2940 /* segmentation id stream base address, DW35-DW37 */
2941 if (enable_segmentation) {
2942 OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2943 0, I915_GEM_DOMAIN_INSTRUCTION,
2945 OUT_BCS_BATCH(batch, 0);
2946 OUT_BCS_BATCH(batch, 0);
2949 OUT_BCS_BATCH(batch, 0);
2950 OUT_BCS_BATCH(batch, 0);
2951 OUT_BCS_BATCH(batch, 0);
2953 ADVANCE_BCS_BATCH(batch);
2957 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2958 VAPictureParameterBufferVP8 *pic_param,
2959 VASliceParameterBufferVP8 *slice_param,
2960 dri_bo *slice_data_bo,
2961 struct gen7_mfd_context *gen7_mfd_context)
2963 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2965 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2966 unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2967 unsigned int partition_size_0 = slice_param->partition_size[0];
2969 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2970 if (used_bits == 8) {
2973 partition_size_0 -= 1;
2976 assert(slice_param->num_of_partitions >= 2);
2977 assert(slice_param->num_of_partitions <= 9);
2979 log2num = (int)log2(slice_param->num_of_partitions - 1);
2981 BEGIN_BCS_BATCH(batch, 22);
2982 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2983 OUT_BCS_BATCH(batch,
2984 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2985 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2987 (slice_param->macroblock_offset & 0x7));
2988 OUT_BCS_BATCH(batch,
2989 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2992 OUT_BCS_BATCH(batch, partition_size_0);
2993 OUT_BCS_BATCH(batch, offset);
2994 //partion sizes in bytes are present after the above first partition when there are more than one token partition
2995 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2996 for (i = 1; i < 9; i++) {
2997 if (i < slice_param->num_of_partitions) {
2998 OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2999 OUT_BCS_BATCH(batch, offset);
3001 OUT_BCS_BATCH(batch, 0);
3002 OUT_BCS_BATCH(batch, 0);
3005 offset += slice_param->partition_size[i];
3008 OUT_BCS_BATCH(batch,
3009 1 << 31 | /* concealment method */
3012 ADVANCE_BCS_BATCH(batch);
3016 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3017 struct decode_state *decode_state,
3018 struct gen7_mfd_context *gen7_mfd_context)
3020 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3021 VAPictureParameterBufferVP8 *pic_param;
3022 VASliceParameterBufferVP8 *slice_param;
3023 dri_bo *slice_data_bo;
3025 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3026 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3028 /* one slice per frame */
3029 if (decode_state->num_slice_params != 1 ||
3030 (!decode_state->slice_params ||
3031 !decode_state->slice_params[0] ||
3032 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3033 (!decode_state->slice_datas ||
3034 !decode_state->slice_datas[0] ||
3035 !decode_state->slice_datas[0]->bo) ||
3036 !decode_state->probability_data) {
3037 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3042 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3043 slice_data_bo = decode_state->slice_datas[0]->bo;
3045 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3046 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3047 intel_batchbuffer_emit_mi_flush(batch);
3048 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3049 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3050 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3051 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3052 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3053 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3054 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3055 intel_batchbuffer_end_atomic(batch);
3056 intel_batchbuffer_flush(batch);
3060 gen8_mfd_decode_picture(VADriverContextP ctx,
3062 union codec_state *codec_state,
3063 struct hw_context *hw_context)
3066 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3067 struct decode_state *decode_state = &codec_state->decode;
3070 assert(gen7_mfd_context);
3072 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3074 if (vaStatus != VA_STATUS_SUCCESS)
3077 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3080 case VAProfileMPEG2Simple:
3081 case VAProfileMPEG2Main:
3082 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3085 case VAProfileH264ConstrainedBaseline:
3086 case VAProfileH264Main:
3087 case VAProfileH264High:
3088 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3091 case VAProfileVC1Simple:
3092 case VAProfileVC1Main:
3093 case VAProfileVC1Advanced:
3094 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3097 case VAProfileJPEGBaseline:
3098 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3101 case VAProfileVP8Version0_3:
3102 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3110 vaStatus = VA_STATUS_SUCCESS;
3117 gen8_mfd_context_destroy(void *hw_context)
3119 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3121 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3122 gen7_mfd_context->post_deblocking_output.bo = NULL;
3124 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3125 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3127 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3128 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3130 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3131 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3133 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3134 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3136 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3137 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3139 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3140 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3142 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3143 gen7_mfd_context->segmentation_buffer.bo = NULL;
3145 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3147 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3148 free(gen7_mfd_context);
3151 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3152 struct gen7_mfd_context *gen7_mfd_context)
3154 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3155 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3156 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3157 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3161 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3163 struct intel_driver_data *intel = intel_driver_data(ctx);
3164 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3167 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3168 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3169 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3171 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3172 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3173 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3176 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3177 gen7_mfd_context->segmentation_buffer.valid = 0;
3179 switch (obj_config->profile) {
3180 case VAProfileMPEG2Simple:
3181 case VAProfileMPEG2Main:
3182 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3185 case VAProfileH264ConstrainedBaseline:
3186 case VAProfileH264Main:
3187 case VAProfileH264High:
3188 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3193 return (struct hw_context *)gen7_mfd_context;